In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
import glob
import pdb
import re
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from matplotlib import rc
rc('text', usetex=False)

from polyphase import get_chi_vector, timer
import pickle

# Final version of HTE data
This version contains `molecular weight` and `density` values for each polymer, small molecule and solvent.

In [3]:
SM = [['PC61BM',19.89,5.68,3.64,915.91,1],
      ['PC71BM',20.16,5.37,4.49,1030.99,1],
      ['DPP(TBFu)2',19.33,4.78,6.26,757,1],
      ['DPP(PhTT)2',19.64,3.54,6.12,757,1],
      ['Y6',18.98,3.72,3.44,1452,1.3659],
      ['IDTBR',19.6,4.6,2.9,1326.03,1],
      ['IDIC',18.7,7.2,4.5,1011,1]
     ]

polymers = [['MDMO-PPV',19.06,5.62,5.28,12e4,1],
            ['MEH-PPV',19.06,5.38,5.44,7e4,1],
            ['P3HT',18.56,2.88,3.19,1e4,1.15],
            ['PFO',18.55,2.8,4.51,1.01e5,1.14],
            ['PM6',18.37,4.36,4.19,1.06e5,1.2817],
            ['PTB7-Th',18.56,2.3,3.21,5e4,1]
           ]

SM_df = pd.DataFrame.from_records(SM, columns=['name','dD','dP','dH','MW', 'rho'])
polymer_df = pd.DataFrame.from_records(polymers, columns=['name','dD','dP','dH','MW', 'rho'])
solvents_hspip = pd.read_excel('../data/HSP_Calculations.xlsx',sheet_name='solvents')

In [5]:
solvents_gomez_tab =[
    ['DIO_EXPT',17.6,4.8,4.6,198.9],
    ['CF_EXPT',17.8,3.1,5.7,80.5],
    ['CB_EXPT',19,4.3,2,102.1],
    ['DCB_EXPT',19.2,6.3,3.3,113],
    ['TCB_EXPT',20.2,4.2,3.2,125.5],
    ['CN_EXPT',19.9,4.9,2.5,136.3],
    ['Tol_EXPT',18,1.4,2,106.6],
    ['PDMS_EXPT',16,0.1,4.7,93.3],
    ['NMP_EXPT',18,12.3,7.2,96.2],
    ['Ace_EXPT',15.5,10.4,7,73.8]
]
solvents_gomez = pd.DataFrame.from_records(solvents_gomez_tab, columns=['Solvents','dD','dP','dH','MVol'])
solvents = pd.concat([solvents_hspip,solvents_gomez]).reset_index(drop=True)

In [8]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    print(solvents)

                                     Solvents    dD    dP    dH   MVol
0                                     Acetone  15.5  10.4   7.0   73.8
1                                Acetonitrile  15.3  18.0   6.1   52.9
2                              n-Amyl Acetate  15.8   3.3   6.1  148.0
3                              n-Amyl Alcohol  15.9   5.9  13.9  108.6
4                                     Benzene  18.4   0.0   2.0   52.9
5                              Benzyl Alcohol  18.4   6.3  13.7  103.8
6                             Benzyl Benzoate  20.0   5.1   5.2  190.3
7                                   1-Butanol  16.0   5.7  15.8   92.0
8                                   2-Butanol  15.8   5.7  14.5   92.0
9                             n-Butyl Acetate  15.8   3.7   6.3  132.6
10                            t-Butyl Acetate  15.0   3.7   6.0  134.8
11                            t-Butyl Alcohol  15.2   5.1  14.7   96.0
12                             Butyl Benzoate  18.3   5.6   5.5  178.1
13    

In [None]:
from itertools import product

def get_system(indx):
    delta_solvent = solvents.loc[indx[0],['dD','dP','dH']].tolist()
    M_solv = 1
    MVol = solvents.loc[indx[0],'MVol']

    delta_sm = SM_df.loc[indx[1],['dD','dP','dH']].tolist()
    M_sm = (SM_df.loc[indx[1],'MW']/SM_df.loc[indx[1],'rho'])*(1/MVol)
    
    delta_polymer = polymer_df.loc[indx[2],['dD','dP','dH']].tolist()
    M_polymer = (polymer_df.loc[indx[2],'MW']/polymer_df.loc[indx[2],'rho'])*(1/MVol)
    
    M = [M_polymer, M_sm, M_solv]
    chi = get_chi_vector([delta_polymer,delta_sm,delta_solvent], MVol, 2)[0]
    out = [
        solvents.loc[indx[0],'Solvents'],SM_df.loc[indx[1],'name'],polymer_df.loc[indx[2],'name'],
        chi[0], chi[1], chi[2], M,
        delta_solvent, delta_sm, delta_polymer, MVol
    ]   
    return out

axes = [np.arange(0,len(solvents)),np.arange(0,len(SM_df)),np.arange(0,len(polymer_df))]
hte_df = pd.DataFrame(get_system(i) for i in product(*axes))
hte_df.columns =['solvent', 'SM','polymer','chi12','chi13','chi23','dop','delta_solv','delta_SM','delta_polymer','MVol'] 
hte_df.head()
print('Total of {} systems'.format(len(hte_df)))

In [None]:
solvents.to_pickle('../data/solvents.pkl')
polymer_df.to_pickle('../data/polymers.pkl')
SM_df.to_pickle('../data/SM.pkl')

In [None]:
hte_df.to_csv('../data/htev4.csv')
hte_df.to_pickle('../data/htev4.pkl')

In [None]:
PM6_Y6 = hte_df.loc[(hte_df['SM'] == 'Y6') & (hte_df['polymer'] == 'PM6')]
dops = np.vstack(PM6_Y6['dop'].to_numpy())
chis = np.vstack(PM6_Y6.loc[:,['chi12','chi13','chi23']].to_numpy())

In [None]:
PM6_Y6[(PM6_Y6['solvent']=='Chloroform')]


In [None]:
# Visualize the solubulity space
fig, ax = plt.subplots(figsize=(5,5), subplot_kw={'projection':'3d'})


ax.scatter(sm[0], sm[1], sm[2], 
           marker='s',s=100,color='k',label='small molecule')
ax.scatter(polymer[0], polymer[1], polymer[2],
           marker='*',s=100,color='k',label='polymer') 

ax.set_xlabel(r'$\delta_{D}$')
ax.set_ylabel(r'$\delta_{P}$')
ax.set_zlabel(r'$\delta_{H}$')

fig.legend(ncol=2)
touchup3d(ax)
    
if SAVE_FIGS:
    plt.savefig('../figures/final/result2_PM6_Y6_DesignSpace.png', 
                dpi=400, bbox_inches='tight')
else:
    plt.show()

In [None]:
fig,ax = plt.subplots(figsize=(4*1.6,4))
ax.hist(dops[:,:2].flatten(),density=False, bins=50)
ax.set_xlabel('Degree of Polymerization')
ax.set_ylabel('#Occurances')
fig.suptitle('Histogram of all DOPs')
plt.savefig('../figures/notebooks/4.png', dpi=400, bbox_inches='tight')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(4*1.6, 4), subplot_kw={'projection':'3d'})
ax.scatter(chis[:,0], chis[:,1], chis[:,2])
ax.set_xlabel(r'$\chi_{12}$')
ax.set_ylabel(r'$\chi_{13}$')
ax.set_zlabel(r'$\chi_{23}$')
fig.suptitle('Chi values for all solvents PM6 + Y6')
plt.tight_layout()
plt.savefig('../figures/notebooks/1.png', dpi=400, bbox_inches='tight')
plt.show()

fig, ax = plt.subplots(figsize=(4*1.6, 4))
ax.scatter(dops[:,0], dops[:,1])
ax.set_xlabel('Polymer')
ax.set_ylabel('Small molecule')
fig.suptitle('Degree of Polymerization values for all solvents PM6 + Y6')
plt.savefig('../figures/notebooks/2.png', dpi=400, bbox_inches='tight')
plt.show()

fig, ax = plt.subplots(figsize=(4*1.6, 4), subplot_kw={'projection':'3d'})
ax.scatter(solvents['dD'], solvents['dP'], solvents['dH'])
ax.set_xlabel(r'$\delta D$')
ax.set_ylabel(r'$\delta P$')
ax.set_zlabel(r'$\delta H$')
fig.suptitle('Solubility values for all solvents')
plt.tight_layout()
plt.savefig('../figures/notebooks/3.png', dpi=400, bbox_inches='tight')
plt.show()

## Make a dataframe of HTE systems 

In [None]:
solubs = pd.read_pickle('../expts/data/htpdata/solubility.pkl')


In [None]:
solub_sm = solubs['small molecules'].iloc[:,1:5]
solub_polymer = solubs['polymers'].iloc[:,1:5]
solub_solvent= solubs['solvents'].rename(columns={"hydrogen": "dispersive", "dispersive": "hydrogen"})
solub_sm.loc[5]=['Y6',18.37,4.36,4.19]
solub_polymer.loc[4] = ['PM6',19.98,3.72,3.44]

In [None]:
solub_polymer.loc[0].tolist()

In [None]:
from itertools import product

def get_system(indx):
    delta_solvent = solub_solvent.loc[indx[0]].tolist()
    delta_sm = solub_sm.loc[indx[1]].tolist()[1:]
    delta_polymer = solub_polymer.loc[indx[2]].tolist()[1:]
    chi = get_chi_vector([delta_polymer,delta_sm,delta_solvent], 100, 2)[0]
    out = [
        indx[0],solub_sm['name'].loc[indx[1]],solub_polymer['name'].loc[indx[2]],
        chi[0], chi[1], chi[2],
        delta_solvent, delta_sm, delta_polymer
    ]   
    
    return out

axes = [np.arange(0,len(solub_solvent)),np.arange(0,len(solub_sm)),np.arange(0,len(solub_polymer))]
allsys_df = pd.DataFrame(get_system(i) for i in product(*axes))
allsys_df.columns =['solvent', 'SM','polymer','chi12','chi13','chi23','delta_solv','delta_SM','delta_polymer'] 
allsys_df.head()

## Analysing phase diagrams using tSNE

In [None]:
with open('../expts/data/htpdata/solubility.pkl', 'rb') as handle:
    data = pickle.load(handle)
X_emb = np.load('../figures/notebooks/hte_tsne_Xembed.npy')

In [None]:
images_dir = '../figures/hteplots/*.png'
images_list = sorted([file for file in glob.glob(images_dir)])
print('Total of {} phase diagrams'.format(len(images_list)))

tagger = re.compile('../figures/hteplots/(.*)_(.*)_(.*).png')

tags_list = []
for img in images_list:
    tag = tagger.findall(img)
    tags_list.append(tag[0])
    
df_tags = pd.DataFrame(tags_list, columns=['solvent','small molecule','polymer'])

In [None]:
fig, axs = plt.subplots(1,3, figsize=(4*3*1.6, 4), dpi=300)
sns.distplot(data['solvents'].hydrogen, ax=axs[0])
sns.distplot(data['solvents'].polar, ax=axs[1])
sns.distplot(data['solvents'].dispersive, ax=axs[2])
plt.show()

In [None]:
df_tags['solv_h'] = df_tags['solvent'].apply(lambda x: data['solvents'].hydrogen[int(x)])
df_tags['solv_p'] = df_tags['solvent'].apply(lambda x: data['solvents'].polar[int(x)])
df_tags['solv_d'] = df_tags['solvent'].apply(lambda x: data['solvents'].dispersive[int(x)])
df_tags['tsne_x'] = X_emb[:,0]
df_tags['tsne_y'] = X_emb[:,1]
df_tags.head()

It appears that solvents with `polar` component >20 are placed closer than to solvents with ~(0-5). Most of the solvents have `hydrophobic` component near ~(16-18) and are generally spread across and does not show any interesting trends.
`dispersive` component also shows similar behavior : everything below 20 is seperated more or less

In [None]:
import pdb

def add_chi_columns(row):
    solvent, polymer, sm = int(row['solvent']),row['polymer'], row['small molecule']
    delta_solvent = data['solvents'].loc[solvent].tolist()
    delta_sm = data['small molecules'].loc[data['small molecules']['name']==sm].values[0][2:5].tolist()
    delta_polymer = data['polymers'].loc[data['polymers']['name']==polymer].values[0][2:5].tolist()
    chi = get_chi_vector([delta_polymer,delta_sm,delta_solvent], 100, 2)[0]
    
    return pd.Series(chi, index=['chi 12','chi 13', 'chi 23'])

add_chi_columns(df_tags.loc[0])

In [None]:
df_chis = df_tags.apply(add_chi_columns, axis=1)
df_tags = pd.concat([df_tags, df_chis], axis=1)

In [None]:
df_tags.head()

In [None]:
df_tags['solv_t'] = df_tags.apply(lambda x: np.linalg.norm(x.iloc[3:6]), axis=1)

In [None]:
# look at how the tsne plot embeds the solubility parameters of solvent
fig, ax = plt.subplots(figsize=(5*1.6, 5),dpi=300)
ax = sns.scatterplot(x='tsne_x', y= 'tsne_y', size='solv_h', hue='solv_t',
                     sizes=(20, 200), data=df_tags, ax=ax, palette="RdBu"
)
sns.despine()
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=2)
plt.show()

In [None]:
fig = plt.figure(figsize=(5*1.6, 5), dpi=300)
ax = fig.add_subplot(111, projection='3d')
ax.scatter(df_tags['chi 12'], df_tags['chi 13'], df_tags['chi 23'])

ax.set_xlabel(r'$\chi_{12}$')
ax.set_ylabel(r'$\chi_{13}$')
ax.set_zlabel(r'$\chi_{23}$')
plt.show()

## Plot triangualated surfaces

We see a three phase regions in the phase diagram which means there's a big triangle connecting the three corners. But we still want to look at the expensive way of plotting energy surface as a triangulation in a interactive fashion and verify that !!!!

In [None]:
# pick a system with three phase region from the 1720 phase diagrams. Its actually the very first one
from polyphase import compute

chi = df_tags.iloc[0,-3:].to_list()
kwargs = {'flag_lift_label': True, 
          'use_weighted_delaunay': False,
          'flag_remove_collinear' : False, 
          'beta':1e-4, 
          'flag_make_energy_paraboloid': True, 
          'flag_lift_purecomp_energy': False}
configuration = {'M':[100,5,1], 'chi':chi}
dx=200
outdict = compute(3, configuration,dx, **kwargs)

In [None]:
from polyphase import plot_triangulated_surface

fname = '_'.join(i for i in df_tags.iloc[0,:3])
x, y, z = outdict['grid'][0,:], outdict['grid'][1,:], outdict['energy']
fig = plot_triangulated_surface(x,y, x,y,z)
fig.update_layout(title=fname, scene=dict(
    xaxis_title="Phi_1",
    yaxis_title="Phi_2",
    zaxis_title = "Energy"),
    coloraxis_colorbar=dict(title='Energy'),
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="RebeccaPurple")
)
fig.write_html('../figures/3dplots/{}.html'.format(fname))

In [None]:
import plotly.figure_factory as ff

fname = '_'.join(i for i in df_tags.iloc[0,:3])
x, y, z = outdict['grid'][0,:], outdict['grid'][1,:], outdict['energy']

fig = ff.create_trisurf(x=x, y=y, z=z,
                     simplices=outdict['simplices'])

fig.add_trace(go.Scatter3d(x=x, y=y, z=z,mode='markers'))

fig.update_layout(title=fname,scene=dict(
    xaxis_title="Phi_1",
    yaxis_title="Phi_2",
    zaxis_title = "Energy"),
    coloraxis_colorbar=dict(title='Energy'),
    font=dict(
        family="Courier New, monospace",
        size=18,
        )
)

fig.write_html('../figures/3dplots/{}_convexhull.html'.format(fname))