In [41]:
import numpy as np
import pandas as pd
import signac
import json
from typing import Tuple
import matplotlib.pyplot as plt

from martignac.workflows.solute_in_solvent_alchemical import project

pd.set_option("display.precision", 2)

In [42]:
project

In [43]:
df = project.to_dataframe()
df.head()

Unnamed: 0,sp.type,sp.solvent_name,sp.solute_name,sp.lambda_state,doc.SoluteInSolventAlchemicalFlow,doc.SoluteGenFlow,doc.SolventGenFlow,doc.SoluteInSolventGenFlow
d0a06838950a334d442e1cd0130fb155,alchemical_transformation,CLF,P6,8,"{'files_symlinked': True, 'fetched_nomad': Tru...","{'files_symlinked': True, 'fetched_nomad': Tru...","{'files_symlinked': True, 'fetched_nomad': Tru...","{'files_symlinked': True, 'fetched_nomad': Tru..."
480123ac5874ff44b7b0dbe4c145670b,alchemical_transformation,HD,Q1,10,"{'files_symlinked': True, 'fetched_nomad': Tru...","{'files_symlinked': True, 'fetched_nomad': Tru...","{'files_symlinked': True, 'fetched_nomad': Tru...","{'files_symlinked': True, 'fetched_nomad': Tru..."
899b05f6088670fc1111d66c3a879891,alchemical_transformation,OCO,Q1,5,"{'files_symlinked': True, 'fetched_nomad': Tru...","{'files_symlinked': True, 'fetched_nomad': Tru...","{'files_symlinked': True, 'fetched_nomad': Tru...","{'files_symlinked': True, 'fetched_nomad': Tru..."
f6fb8f62c684984898181f51c4f430d6,alchemical_transformation,W,D,3,"{'files_symlinked': True, 'fetched_nomad': Tru...","{'files_symlinked': True, 'fetched_nomad': Tru...","{'files_symlinked': True, 'fetched_nomad': Tru...","{'files_symlinked': True, 'fetched_nomad': Tru..."
b6baf4f686816c57435971b0290c8191,alchemical_transformation,CHEX,D,5,"{'files_symlinked': True, 'fetched_nomad': Tru...","{'files_symlinked': True, 'fetched_nomad': Tru...","{'files_symlinked': True, 'fetched_nomad': Tru...","{'files_symlinked': True, 'fetched_nomad': Tru..."


In [44]:
df_f = pd.DataFrame(
    df.groupby(['sp.solvent_name', 'sp.solute_name']).first()['doc.SoluteInSolventAlchemicalFlow'].apply(lambda x: x['free_energy']['mean'] if 'free_energy' in x.keys() else None)
).reset_index()
df_f = df_f.pivot(index="sp.solute_name", columns="sp.solvent_name", values='doc.SoluteInSolventAlchemicalFlow')
df_f = df_f.apply(lambda x: x * 2.479) # kT to kJ/mol at 298K
df_f = df_f.iloc[:, :-1].sub(df_f["W"], axis=0).assign(W=df_f["W"])
df_f.rename(columns={'BENZ': "BENZ -> W", "CHEX": "CHEX -> W", "CLF": "CLF -> W", "ETH": "ETH -> W", "HD": "HD -> W", "OCO": "OCO -> W"}, inplace=True)
df_f = df_f[["HD -> W", "OCO -> W", "CLF -> W", "ETH -> W", "BENZ -> W", "CHEX -> W", "W"]].reset_index()
df_f.columns.name = None
df_f = df_f.set_index("sp.solute_name")
df_f.index.name = "bead"
df_f.head()

Unnamed: 0_level_0,HD -> W,OCO -> W,CLF -> W,ETH -> W,BENZ -> W,CHEX -> W,W
bead,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
D,,,,,,,
N3a,,,,,,,
P6,,,,,,,
Q1,,,,,,,


In [10]:
df_martini = pd.read_csv("martini_reg_ref.csv")
df_martini = df_martini.rename(
    columns={
        "Bead": "bead", 
        'Oil-water transfer ΔG (HD→W)': "HD -> W",
        'Oil-water transfer ΔG (OCO→W)': "OCO -> W",
        'Oil-water transfer ΔG (CLF→W)': "CLF -> W",
        'Oil-water transfer ΔG (ETH→W)': "ETH -> W",
        'Oil-water transfer ΔG (BENZ→W)': "BENZ -> W", 
        'Oil-water transfer ΔG (CHEX→W)': "CHEX -> W",
        'Hydration ΔG': "W"
    }
).drop('Vaporization ΔG', axis=1).set_index("bead")
df_martini.head()

Unnamed: 0_level_0,HD -> W,OCO -> W,CLF -> W,ETH -> W,BENZ -> W,CHEX -> W,W
bead,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
C1,17.2,18.9,17.2,14.5,14.9,20.0,-8.6
C2,15.4,14.8,14.3,12.9,12.1,16.9,-5.7
C3,13.5,13.8,13.3,13.3,15.1,16.3,-4.8
C4,10.6,13.4,13.6,13.7,18.0,16.3,-5.1
C5,8.8,11.2,12.1,11.9,16.7,13.2,-3.6


In [11]:
bead_mapping = {
    'N3a': 'N3d/a',
    'N3d': 'N3d/a',
    'Q2+': 'Q2',
}

def map_beads(bead):
    return bead_mapping.get(bead, bead)

df_f.index = [map_beads(bead) for bead in df_f.index]
df_f.index.name = "bead"

identical_beads = pd.merge(df_f, df_martini, on="bead").index
identical_beads

Index(['Q1', 'Q2'], dtype='object', name='bead')

In [12]:
df_comp = df_f.loc[identical_beads].copy().compare(df_martini.loc[identical_beads].copy())
df_comp

Unnamed: 0_level_0,HD -> W,HD -> W,OCO -> W,OCO -> W,CLF -> W,CLF -> W,ETH -> W,ETH -> W,BENZ -> W,BENZ -> W,CHEX -> W,CHEX -> W,W,W
Unnamed: 0_level_1,self,other,self,other,self,other,self,other,self,other,self,other,self,other
bead,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
Q1,-21.75,-24.2,-9.17,-10.9,-8.85,-9.5,-1.02,-2.6,-3.1,-4.7,-11.07,-12.5,21.59,23.2
Q2,-30.33,-32.7,-16.05,-15.1,-13.67,-13.9,-4.85,-5.1,-10.19,-10.4,-17.61,-17.1,25.5,25.5


In [13]:
# Define the threshold for highlighting
thresholds = [1.0, 2.0, 3.0]
colors = ["yellow", "orange", "red"]

# Extract 'self' and 'other' values from the comparison DataFrame
df_self = df_comp.loc[:, (slice(None), 'self')]
df_other = df_comp.loc[:, (slice(None), 'other')]

# Function to apply highlighting to the 'self' values
def apply_highlighting(df_self, df_other):
    # Prepare a DataFrame to hold styles
    styles = pd.DataFrame('', index=df_self.index, columns=df_self.columns)

    def format_value(val):
        try:
            return '{:.2f}'.format(val)
        except (TypeError, ValueError):
            return val
    
    # Apply highlighting only to 'self' values
    for row in range(df_self.shape[0]):
        for col in range(df_self.shape[1]):
            val_self = df_self.iloc[row, col]
            val_other = df_other.iloc[row, col]

            for threshold, color in zip(thresholds, colors):
                if abs(val_self - val_other) > threshold:
                    styles.iloc[row, col] = f'background-color: {color}'

    
    return df_comp.style.format(format_value).apply(lambda x: styles, axis=None)

# Apply highlighting and display the styled DataFrame
styled_comp = apply_highlighting(df_self, df_other)
styled_comp

Unnamed: 0_level_0,HD -> W,HD -> W,OCO -> W,OCO -> W,CLF -> W,CLF -> W,ETH -> W,ETH -> W,BENZ -> W,BENZ -> W,CHEX -> W,CHEX -> W,W,W
Unnamed: 0_level_1,self,other,self,other,self,other,self,other,self,other,self,other,self,other
bead,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
Q1,-21.75,-24.2,-9.17,-10.9,-8.85,-9.5,-1.02,-2.6,-3.1,-4.7,-11.07,-12.5,21.59,23.2
Q2,-30.33,-32.7,-16.05,-15.1,-13.67,-13.9,-4.85,-5.1,-10.19,-10.4,-17.61,-17.1,25.5,25.5
