In [1]:
!pip install -i https://test.pypi.org/simple/ xyz2graph

Looking in indexes: https://test.pypi.org/simple/
Collecting xyz2graph
  Downloading https://test-files.pythonhosted.org/packages/ed/dc/38398b8a06ec96a72da126896f56bab49e388b987f09cd03f0e60b905fd8/xyz2graph-0.1-py3-none-any.whl (5.4 kB)
Installing collected packages: xyz2graph
Successfully installed xyz2graph-0.1


In [5]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from xyz2graph import MolGraph, to_networkx_graph, to_plotly_figure
from plotly.offline import init_notebook_mode, iplot

from plotting import *
from molecule_rot import MoleculeRot

In [3]:
home = os.path.abspath('/mnt/research/RotateDiheds/data')
master_dir = os.path.join(home,'masters/')
plot_dir = os.path.join(home, 'plots/')

all_molecules = make_all_mol_list(master_dir)
print("Done making all_molecules list.") 

Done making all_molecules list.


In [6]:
def boltzman_dist(x, A=1):
    return A*np.exp(-x/(298*8.314))
def get_probability(energy_df):
    if isinstance(energy_df, pd.DataFrame) and len(energy_df.index) > 1:
        energy_df['probability'] = energy_df.energy.apply(lambda x: boltzman_dist(x))
    #     probs.rename(columns={'energy': 'probability'}, inplace=True)
        sum_prob = energy_df.probability.sum()
        energy_df.probability = energy_df.probability.apply(lambda x: x/sum_prob)
        return energy_df
def planarity_func(angle, prob):
    angle_rad = angle * np.pi / 180
    return prob * np.cos(angle_rad)**2
def planarity(energy_df):
    try:
        energy_df['angle'] = energy_df.index
        indiv_planarity = energy_df.apply(lambda x: planarity_func(x.angle, x.probability), axis=1)
        planarity = indiv_planarity.sum()
        energy_df.drop(['angle'], axis=1, inplace=True)
        return planarity
    except: 
        return None
def norm_eng(energy_df):
    if isinstance(energy_df, pd.DataFrame) and len(energy_df.index) > 1:
        if energy_df.energy.iloc[0] < energy_df.energy.iloc[-1]:
            energy_df['norm_eng'] = energy_df.energy.values[::-1]
        else: 
            energy_df['norm_eng'] = energy_df.energy
    return energy_df

In [7]:
data = pd.DataFrame()
for i, mol in enumerate(all_molecules):
    try:
        energy = pd.DataFrame(mol.norm_energy_dict.values(),index=mol.norm_energy_dict.keys(), columns=['energy'])
    except AttributeError: 
        energy = None
    try: 
        homo_lumo_init = pd.DataFrame([mol.homo_dict, mol.lumo_dict], index=['lumo_energy','homo_energy'])
        homo_lumo = homo_lumo_init.T
    except AttributeError: 
        homo_lumo = None
    try:
        cent_bond_length = pd.DataFrame(mol.central_bond_length_dict.values(),index=mol.central_bond_length_dict.keys(), columns=['bond_length'])
    except AttributeError: 
        cent_bond_length = None
    mol_data = {'name': mol.name, 
                'ring_num': mol.ring_num,
                'unit_num': mol.unit_num,
                'polymer_num': mol.polymer_num,
                'chromophore': str(mol.chromophore),
                'substituents': (mol.substituents).upper(),
                'side_chains': mol.side_chains,
                'tuned_omega': mol.tuned_omega,
                'smiles': mol.smiles,
                'central_bond_lengths': cent_bond_length,
                'energy': energy,
                'homo_lumo': homo_lumo, 
                'min_e_angle': mol.unconst_angle
               }
    data = data.append(mol_data, ignore_index=True) 

data.fillna(value=np.nan, inplace=True)
data['side_chains_str'] = data.apply(lambda x: str(x.side_chains), axis=1)
data['molecule'] = data.apply(lambda x: '{:.0f}_{:.0f}'.format(x['ring_num'],x['polymer_num']), axis=1)
# data['tuned_omega'] = data.apply(lambda x: "0."+ str(x.tuned_omega)[1:] if pd.notna(x.tuned_omega) else np.nan, axis=1)
data['adjusted_min_e_angle'] = data.apply(lambda x: abs(90 - x.min_e_angle) if pd.notna(x.min_e_angle) else np.nan, axis=1)
data['molecule_type'] = data.chromophore + data.side_chains_str
data['inv_tuned_omega'] = data.tuned_omega.apply(lambda x: 1/x if pd.notna(x) else np.nan,)
data['energy_info'] = data.apply(lambda x: get_probability(x.energy) if x.energy is not np.nan else np.nan, axis=1)
data['energy_info'] = data.apply(lambda x: norm_eng(x.energy), axis=1)
# data['energy'] = data.apply(lambda x: x.energy_info.energy if x.energy_info is not np.nan else np.nan, axis=1)
data['planarity'] = data.apply(lambda x: planarity(x.energy) if x.energy is not np.nan else np.nan, axis=1)

data.head(5)

Unnamed: 0,central_bond_lengths,chromophore,energy,homo_lumo,min_e_angle,name,polymer_num,ring_num,side_chains,smiles,substituents,tuned_omega,unit_num,side_chains_str,molecule,adjusted_min_e_angle,molecule_type,inv_tuned_omega,energy_info,planarity
0,bond_length 0.0 1.342520 10.0 ...,bithiophene,energy probability norm_eng 0.0 ...,lumo_energy homo_energy 0.0 -9.56...,179.999942,mols_55_1_00_C-C,0.0,55.0,(),[cH]1[cH][s][c](-[c]2[cH][cH][cH][s]2)[cH]1,C-C,0.246111,1.0,(),55_0,89.999942,bithiophene(),4.063212,energy probability norm_eng 0.0 ...,0.526474
1,bond_length 0.0 1.375265 10.0 ...,bithiophene,energy probability norm_eng 0.0 ...,lumo_energy homo_energy 0.0 -9.31...,179.999978,mols_55_1_01_C-CF,1.0,55.0,"(F,)",[F][c]1[cH][cH][s][c]1-[c]1[cH][cH][cH][s]1,C-CF,0.251646,1.0,"('F',)",55_1,89.999978,"bithiophene('F',)",3.973838,energy probability norm_eng 0.0 ...,0.526446
2,bond_length 0.0 1.419547 10.0 ...,thienyl_thiazole,energy probability norm_eng 0.0 ...,lumo_energy homo_energy 0.0 -9.46...,179.999911,mols_55_1_02_C-N,2.0,55.0,(),[cH]1[cH][s][c](-[c]2[n][cH][cH][s]2)[cH]1,C-N,0.262108,1.0,(),55_2,89.999911,thienyl_thiazole(),3.815222,energy probability norm_eng 0.0 ...,0.526586
3,bond_length 0.0 1.291189 10.0 ...,bithiophene,energy probability norm_eng 0.0 ...,lumo_energy homo_energy 0.0 -9.92...,179.999967,mols_55_1_03_C-COC,3.0,55.0,"(OCH3,)",[CH3][O][c]1[cH][cH][s][c]1-[c]1[cH][cH][cH][s]1,C-COC,0.237226,1.0,"('OCH3',)",55_3,89.999967,"bithiophene('OCH3',)",4.215388,energy probability norm_eng 0.0 ...,0.526562
4,bond_length 0.0 1.288862 10.0 ...,bithiophene,energy probability norm_eng 0.0 ...,lumo_energy homo_energy 0.0 -10.84...,180.0,mols_55_1_04_CF-CF,4.0,55.0,"(F, F)",[F][c]1[cH][cH][s][c]1-[c]1[s][cH][cH][c]1[F],CF-CF,0.25268,1.0,"('F', 'F')",55_4,90.0,"bithiophene('F', 'F')",3.957573,energy probability norm_eng 0.0 ...,0.526399


In [20]:
mol = all_molecules[17]
structure = mol.structures_data['0']
temp_file = os.path.join(home,'temp.xyz')
with open(temp_file, 'w') as file: 
    file.write(structure)

# Initiate the Plotly notebook mode
init_notebook_mode(connected=True)
# Create the MolGraph object
mg = MolGraph()
# Read the data from the .xyz file
mg.read_xyz(temp_file)
# Create the Plotly figure object
fig = to_plotly_figure(mg)
# Plot the figure
iplot(fig)