1. This Notebook will convert molecular conformers to SPMS
2. SPMS of CPAs, imines and thiols will be saved in **"./SPMS/cat.npy"**, **"./SPMS/imine.npy"** and **"./SPMS/thiol.npy"**

In [3]:
!unzip ./Conformers.zip

Archive:  ./Conformers.zip
   creating: Conformers/
   creating: Conformers/CPA/
  inflating: Conformers/CPA/cat_0_conf_0.sdf  
  inflating: Conformers/CPA/cat_0_conf_12.sdf  
  inflating: Conformers/CPA/cat_0_conf_13.sdf  
  inflating: Conformers/CPA/cat_0_conf_17.sdf  
  inflating: Conformers/CPA/cat_0_conf_18.sdf  
  inflating: Conformers/CPA/cat_0_conf_20.sdf  
  inflating: Conformers/CPA/cat_0_conf_21.sdf  
  inflating: Conformers/CPA/cat_0_conf_23.sdf  
  inflating: Conformers/CPA/cat_0_conf_24.sdf  
  inflating: Conformers/CPA/cat_0_conf_25.sdf  
  inflating: Conformers/CPA/cat_0_conf_26.sdf  
  inflating: Conformers/CPA/cat_0_conf_27.sdf  
  inflating: Conformers/CPA/cat_0_conf_29.sdf  
  inflating: Conformers/CPA/cat_0_conf_3.sdf  
  inflating: Conformers/CPA/cat_0_conf_31.sdf  
  inflating: Conformers/CPA/cat_0_conf_33.sdf  
  inflating: Conformers/CPA/cat_0_conf_36.sdf  
  inflating: Conformers/CPA/cat_0_conf_37.sdf  
  inflating: Conformers/CPA/cat_0_co

  inflating: Conformers/CPA/cat_4_conf_23.sdf  
  inflating: Conformers/CPA/cat_4_conf_25.sdf  
  inflating: Conformers/CPA/cat_4_conf_3.sdf  
  inflating: Conformers/CPA/cat_4_conf_30.sdf  
  inflating: Conformers/CPA/cat_4_conf_31.sdf  
  inflating: Conformers/CPA/cat_4_conf_32.sdf  
  inflating: Conformers/CPA/cat_4_conf_36.sdf  
  inflating: Conformers/CPA/cat_4_conf_37.sdf  
  inflating: Conformers/CPA/cat_4_conf_4.sdf  
  inflating: Conformers/CPA/cat_4_conf_40.sdf  
  inflating: Conformers/CPA/cat_4_conf_45.sdf  
  inflating: Conformers/CPA/cat_4_conf_46.sdf  
  inflating: Conformers/CPA/cat_4_conf_47.sdf  
  inflating: Conformers/CPA/cat_4_conf_48.sdf  
  inflating: Conformers/CPA/cat_4_conf_49.sdf  
  inflating: Conformers/CPA/cat_4_conf_51.sdf  
  inflating: Conformers/CPA/cat_4_conf_7.sdf  
  inflating: Conformers/CPA/cat_5_conf_1.sdf  
  inflating: Conformers/CPA/cat_5_conf_10.sdf  
  inflating: Conformers/CPA/cat_5_conf_11.sdf  
  inflating: Conformers/

In [8]:
!ls ./Conformers/

CPA  imine  thiol


In [22]:
! ls ./Reaction_Result/

Denmark_Reaction_Data.csv  ddG.npy


## Load original files

In [1]:
import glob
from SPMS import SPMS
import pandas as pd

In [2]:
result_df = pd.read_csv('./Reaction_Result/Denmark_Reaction_Data.csv')
cat_smiles = result_df['Catalyst'].to_list()
imine_smiles = result_df['Imine'].to_list()
thiol_smiles = result_df['Thiol'].to_list()
ddG = result_df['Output'].to_list()

cat_smiles_set = list(set(cat_smiles))
imine_smiles_set = list(set(imine_smiles))
thiol_smiles_set = list(set(thiol_smiles))

In [3]:
cpa_sdf_files = glob.glob('./Conformers/CPA/*.sdf')
imine_sdf_files = glob.glob('./Conformers/imine/*.sdf')
thiol_sdf_files = glob.glob('./Conformers/thiol/*.sdf')

## Calculate SPMS

### CPA

In [4]:
cpa_sdf_files.sort(key=lambda x:eval(x.split('/')[-1].split('_')[1]))
all_radius = []
for tmp_sdf_file in cpa_sdf_files:
    spms = SPMS(tmp_sdf_file,key_atom_num=[3],desc_n=40,desc_m=40)
    spms._Standarlize_Geomertry()
    tmp_sphere_radius = spms.sphere_radius
    all_radius.append(tmp_sphere_radius)
all_radius = np.array(all_radius)
default_radius = all_radius.max()

In [5]:
cpa_sdf_files

['./Conformers/CPA/cat_0_conf_33.sdf',
 './Conformers/CPA/cat_0_conf_29.sdf',
 './Conformers/CPA/cat_0_conf_12.sdf',
 './Conformers/CPA/cat_0_conf_23.sdf',
 './Conformers/CPA/cat_0_conf_5.sdf',
 './Conformers/CPA/cat_0_conf_6.sdf',
 './Conformers/CPA/cat_0_conf_18.sdf',
 './Conformers/CPA/cat_0_conf_31.sdf',
 './Conformers/CPA/cat_0_conf_24.sdf',
 './Conformers/CPA/cat_0_conf_27.sdf',
 './Conformers/CPA/cat_0_conf_25.sdf',
 './Conformers/CPA/cat_0_conf_17.sdf',
 './Conformers/CPA/cat_0_conf_3.sdf',
 './Conformers/CPA/cat_0_conf_0.sdf',
 './Conformers/CPA/cat_0_conf_37.sdf',
 './Conformers/CPA/cat_0_conf_26.sdf',
 './Conformers/CPA/cat_0_conf_20.sdf',
 './Conformers/CPA/cat_0_conf_36.sdf',
 './Conformers/CPA/cat_0_conf_13.sdf',
 './Conformers/CPA/cat_0_conf_21.sdf',
 './Conformers/CPA/cat_1_conf_11.sdf',
 './Conformers/CPA/cat_1_conf_12.sdf',
 './Conformers/CPA/cat_1_conf_8.sdf',
 './Conformers/CPA/cat_1_conf_43.sdf',
 './Conformers/CPA/cat_1_conf_47.sdf',
 './Conformers/CPA/cat_1_conf_

In [6]:
all_desc = []
for tmp_sdf_file in cpa_sdf_files:
    spms = SPMS(tmp_sdf_file,key_atom_num=[3],desc_n=40,desc_m=40,sphere_radius=default_radius)
    spms.GetSphereDescriptors()
    desc = spms.sphere_descriptors
    all_desc.append(desc)
all_desc = np.array(all_desc)

In [7]:
cat_desc_dict = {}
for item in cat_smiles_set:
    cat_desc_dict[item] = []
for i in range(len(all_desc)):
    tmp_sdf_file = cpa_sdf_files[i]
    with open(tmp_sdf_file,'r') as fr:
        tmp_smiles = fr.readlines()[0].strip()
    cat_desc_dict[tmp_smiles].append(all_desc[i])
new_cat_desc_dict = {}
for tmp_key in cat_smiles_set:
    new_cat_desc_dict[tmp_key] = np.average(np.array(cat_desc_dict[tmp_key]),axis=0)

### imine

In [8]:
imine_sdf_files.sort(key=lambda x:eval(x.split('/')[-1].split('_')[1]))
all_radius = []
for tmp_sdf_file in imine_sdf_files:
    spms = SPMS(tmp_sdf_file,key_atom_num=[10],desc_n=40,desc_m=40)
    spms._Standarlize_Geomertry()
    tmp_sphere_radius = spms.sphere_radius
    all_radius.append(tmp_sphere_radius)
all_radius = np.array(all_radius)
default_radius = all_radius.max()

In [9]:
imine_sdf_files

['./Conformers/imine/imine_0_conf_16.sdf',
 './Conformers/imine/imine_0_conf_10.sdf',
 './Conformers/imine/imine_0_conf_7.sdf',
 './Conformers/imine/imine_0_conf_18.sdf',
 './Conformers/imine/imine_0_conf_9.sdf',
 './Conformers/imine/imine_0_conf_19.sdf',
 './Conformers/imine/imine_0_conf_6.sdf',
 './Conformers/imine/imine_0_conf_12.sdf',
 './Conformers/imine/imine_0_conf_1.sdf',
 './Conformers/imine/imine_0_conf_5.sdf',
 './Conformers/imine/imine_0_conf_14.sdf',
 './Conformers/imine/imine_0_conf_3.sdf',
 './Conformers/imine/imine_0_conf_15.sdf',
 './Conformers/imine/imine_0_conf_2.sdf',
 './Conformers/imine/imine_0_conf_8.sdf',
 './Conformers/imine/imine_0_conf_4.sdf',
 './Conformers/imine/imine_0_conf_0.sdf',
 './Conformers/imine/imine_0_conf_17.sdf',
 './Conformers/imine/imine_0_conf_11.sdf',
 './Conformers/imine/imine_0_conf_13.sdf',
 './Conformers/imine/imine_1_conf_12.sdf',
 './Conformers/imine/imine_1_conf_5.sdf',
 './Conformers/imine/imine_1_conf_15.sdf',
 './Conformers/imine/i

In [10]:
all_desc = []
for tmp_sdf_file in imine_sdf_files:
    spms = SPMS(tmp_sdf_file,key_atom_num=[9,10],desc_n=40,desc_m=40,sphere_radius=default_radius)
    spms.GetSphereDescriptors()
    desc = spms.sphere_descriptors
    all_desc.append(desc)
all_desc = np.array(all_desc)


In [11]:
imine_desc_dict = {}
for item in imine_smiles_set:
    imine_desc_dict[item] = []
for i in range(len(all_desc)):
    tmp_sdf_file = imine_sdf_files[i]
    with open(tmp_sdf_file,'r') as fr:
        tmp_smiles = fr.readlines()[0].strip()
    imine_desc_dict[tmp_smiles].append(all_desc[i])
new_imine_desc_dict = {}
for tmp_key in imine_smiles_set:
    new_imine_desc_dict[tmp_key] = np.average(np.array(imine_desc_dict[tmp_key]),axis=0)

### thiol

In [13]:
thiol_key = [[1],[1],[1],[1],[3]]
thiol_sdf_files.sort(key=lambda x:eval(x.split('/')[-1].split('_')[1]))
all_radius = []
for i,tmp_sdf_file in enumerate(thiol_sdf_files):
    tmp_key_atom = thiol_key[i//20]
    spms = SPMS(tmp_sdf_file,key_atom_num=tmp_key_atom,desc_n=40,desc_m=40)
    spms._Standarlize_Geomertry()
    tmp_sphere_radius = spms.sphere_radius
    all_radius.append(tmp_sphere_radius)
all_radius = np.array(all_radius)
default_radius = all_radius.max()

In [14]:
thiol_sdf_files

['./Conformers/thiol/thiol_0_conf_10.sdf',
 './Conformers/thiol/thiol_0_conf_3.sdf',
 './Conformers/thiol/thiol_0_conf_11.sdf',
 './Conformers/thiol/thiol_0_conf_2.sdf',
 './Conformers/thiol/thiol_0_conf_9.sdf',
 './Conformers/thiol/thiol_0_conf_1.sdf',
 './Conformers/thiol/thiol_0_conf_16.sdf',
 './Conformers/thiol/thiol_0_conf_7.sdf',
 './Conformers/thiol/thiol_0_conf_18.sdf',
 './Conformers/thiol/thiol_0_conf_8.sdf',
 './Conformers/thiol/thiol_0_conf_13.sdf',
 './Conformers/thiol/thiol_0_conf_19.sdf',
 './Conformers/thiol/thiol_0_conf_4.sdf',
 './Conformers/thiol/thiol_0_conf_12.sdf',
 './Conformers/thiol/thiol_0_conf_5.sdf',
 './Conformers/thiol/thiol_0_conf_15.sdf',
 './Conformers/thiol/thiol_0_conf_6.sdf',
 './Conformers/thiol/thiol_0_conf_0.sdf',
 './Conformers/thiol/thiol_0_conf_17.sdf',
 './Conformers/thiol/thiol_0_conf_14.sdf',
 './Conformers/thiol/thiol_1_conf_11.sdf',
 './Conformers/thiol/thiol_1_conf_16.sdf',
 './Conformers/thiol/thiol_1_conf_19.sdf',
 './Conformers/thiol/

In [15]:
default_radius

10.0

In [16]:
all_desc = []
for i,tmp_sdf_file in enumerate(thiol_sdf_files):
    tmp_key_atom = thiol_key[i//20]
    spms = SPMS(tmp_sdf_file,key_atom_num=tmp_key_atom,desc_n=40,desc_m=40,sphere_radius=default_radius)
    spms.GetSphereDescriptors()
    desc = spms.sphere_descriptors
    all_desc.append(desc)
all_desc = np.array(all_desc)

In [17]:
thiol_desc_dict = {}
for item in thiol_smiles_set:
    thiol_desc_dict[item] = []

for i in range(len(all_desc)):
    
    tmp_sdf_file = thiol_sdf_files[i]
    with open(tmp_sdf_file,'r') as fr:
        tmp_smiles = fr.readlines()[0].strip()
    
    thiol_desc_dict[tmp_smiles].append(all_desc[i])
new_thiol_desc_dict = {}
for tmp_key in thiol_smiles_set:
    new_thiol_desc_dict[tmp_key] = np.average(np.array(thiol_desc_dict[tmp_key]),axis=0)

## Generate reaction SPMS

In [20]:
react_cat_desc = np.array([new_cat_desc_dict[item] for item in cat_smiles])
react_imine_desc = np.array([new_imine_desc_dict[item] for item in imine_smiles])
react_thiol_desc = np.array([new_thiol_desc_dict[item] for item in thiol_smiles])

np.save('./SPMS/cat.npy',react_cat_desc)
np.save('./SPMS/imine.npy',react_imine_desc)
np.save('./SPMS/thiol.npy',react_thiol_desc)