In [1]:
import pandas as pd
import metrics
import chem_utils.mol as mol_utils

# remove rdkit warnings
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*')

In [2]:
# load df
smiles_col = "Smiles"
generated_smiles_csv_path = "/mnt/ssd2/Chem/photopolymerization_initiator/ML/MolGen/data/results/generated_smiles/20240207132032.csv"
df = pd.read_csv(generated_smiles_csv_path)
df.head()

Unnamed: 0,Smiles
0,CC(=O)O\N=C(\C)c1ccc(Sc2ccccc2)cc1
1,CC(=O)ON=Cc1cc([N+](=O)[O-])ccc1O
2,O=Cc1cn(-c2ccccc2)c2ccc(C#Cc3ccc(C=NOC(C)=O)cc...
3,c1cc(NC(C(F)(F)C(C(F)(F)F)(F)C(=O)O)(C(F)(F)F)...
4,CC(=O)Nc1ccc(C#Cc2cc3c(cc2C=NOC(C)=O)cccc3)cc1


In [3]:
# remove invalid mols
df = df[df[smiles_col].apply(mol_utils.smiles2mol2smiles) != None]
df = df.dropna()
df.head()

Unnamed: 0,Smiles
0,CC(=O)O\N=C(\C)c1ccc(Sc2ccccc2)cc1
1,CC(=O)ON=Cc1cc([N+](=O)[O-])ccc1O
2,O=Cc1cn(-c2ccccc2)c2ccc(C#Cc3ccc(C=NOC(C)=O)cc...
3,c1cc(NC(C(F)(F)C(C(F)(F)F)(F)C(=O)O)(C(F)(F)F)...
4,CC(=O)Nc1ccc(C#Cc2cc3c(cc2C=NOC(C)=O)cccc3)cc1


In [4]:
# add symmetry
df['symmetry'] = df[smiles_col].apply(metrics.symmetry)
df.head()

Unnamed: 0,Smiles,symmetry
0,CC(=O)O\N=C(\C)c1ccc(Sc2ccccc2)cc1,0.4
1,CC(=O)ON=Cc1cc([N+](=O)[O-])ccc1O,0.0
2,O=Cc1cn(-c2ccccc2)c2ccc(C#Cc3ccc(C=NOC(C)=O)cc...,0.266667
3,c1cc(NC(C(F)(F)C(C(F)(F)F)(F)C(=O)O)(C(F)(F)F)...,0.384615
4,CC(=O)Nc1ccc(C#Cc2cc3c(cc2C=NOC(C)=O)cccc3)cc1,0.142857


In [5]:
# add oxime_num
df['num_oxime_activation'] = df[smiles_col].apply(metrics.num_oxime_activation)
df.head()

Unnamed: 0,Smiles,symmetry,num_oxime_activation
0,CC(=O)O\N=C(\C)c1ccc(Sc2ccccc2)cc1,0.4,1
1,CC(=O)ON=Cc1cc([N+](=O)[O-])ccc1O,0.0,1
2,O=Cc1cn(-c2ccccc2)c2ccc(C#Cc3ccc(C=NOC(C)=O)cc...,0.266667,1
3,c1cc(NC(C(F)(F)C(C(F)(F)F)(F)C(=O)O)(C(F)(F)F)...,0.384615,0
4,CC(=O)Nc1ccc(C#Cc2cc3c(cc2C=NOC(C)=O)cccc3)cc1,0.142857,1


In [6]:
# max fused ring size
df['max_fused_ring_size'] = df[smiles_col].apply(metrics.get_max_fused_ring_size)
df.head()

Unnamed: 0,Smiles,symmetry,num_oxime_activation,max_fused_ring_size
0,CC(=O)O\N=C(\C)c1ccc(Sc2ccccc2)cc1,0.4,1,1
1,CC(=O)ON=Cc1cc([N+](=O)[O-])ccc1O,0.0,1,1
2,O=Cc1cn(-c2ccccc2)c2ccc(C#Cc3ccc(C=NOC(C)=O)cc...,0.266667,1,2
3,c1cc(NC(C(F)(F)C(C(F)(F)F)(F)C(=O)O)(C(F)(F)F)...,0.384615,0,1
4,CC(=O)Nc1ccc(C#Cc2cc3c(cc2C=NOC(C)=O)cccc3)cc1,0.142857,1,2


In [7]:
df.to_csv('oxime_filtered_checkpoint.csv')

In [8]:
# filter
oxime_num_min = 2
max_fused_ring_size_min = 2
min_symmetry = 0.6

df_filter = df[(df['num_oxime_activation'] >= oxime_num_min) & 
               (df['max_fused_ring_size'] >= max_fused_ring_size_min) &
                (df['symmetry'] >= min_symmetry)
            ]
df_filter

Unnamed: 0,Smiles,symmetry,num_oxime_activation,max_fused_ring_size
880,CCCCCCn1c2ccc(C#Cc3ccc(C=NOC(C)=O)cc3)cc2c2cc(...,0.869565,2,3
961,CC(=O)O\N=C/c1ccc2cc(/C=N\OC(C)=O)ccc2c1,1.000000,2,2
1315,CCCCCCn1c2ccc(C#Cc3ccc(C=NOC(C)=O)cc3)cc2c2cc(...,0.869565,2,3
1546,CCCCCCCCCCn1c2ccc(C#Cc3ccc(C=NOC(C)=O)cc3)cc2c...,0.800000,2,3
1828,O=C(O/N=C/c1cc2ccccc2cc1/C=N/OC(=O)c1ccccc1)c1...,1.000000,2,2
...,...,...,...,...
421697,CC(=O)O\N=C1\c2c(cccc2)/C(=N/OC(C)=O)C1=O,0.900000,2,2
421871,CC(=O)O/N=C\c1ccc2c(c1)Sc1cc(/C=N\OC(C)=O)ccc1N2C,0.923077,2,3
421942,CCCCCCn1c2ccc(/C(C)=N/OC(C)=O)cc2c2cc(/C(C)=N/...,0.812500,2,3
422233,CC(=O)O\N=C\c1ccc2ccc(/C=N/OC(C)=O)cc2c1,0.909091,2,2


In [9]:
df_filter.to_csv('oxime_filter.csv')

In [10]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from PIL import Image, ImageDraw, ImageFont

In [11]:
df = pd.read_csv('oxime_pred_vals.csv')
df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Smiles,symmetry,num_oxime_activation,max_fused_ring_size,is_able_to_calculate,T1,S1,S1_osc,S2,S2_osc,S3,S3_osc
0,0,592,CC(=O)O\N=C\c1ccc2c(c1)Sc1cc(/C=N/OC(C)=O)ccc1...,0.666667,2,3,True,2.201316,3.358353,0.314739,3.799375,0.095388,3.956727,0.446369
1,1,1190,CCC(CCCC)Cn1c2ccc(C#Cc3ccc(C=NOC(C)=O)cc3)cc2c...,0.833333,2,3,True,2.129354,3.494984,2.221143,3.80724,0.860613,3.919722,0.519636
2,2,1725,CCCCCCn1c2ccc(/C(C)=N/OC(C)=O)cc2c2cc(/C(C)=N/...,0.8125,2,3,True,2.673143,3.951552,0.0436,4.160967,0.131715,4.53657,0.252301
3,3,3407,CCCCCCC/C(=N\OC(=O)c1ccccc1)c1ccc2c(c1)Sc1cc(/...,0.928571,2,3,True,2.430663,3.420066,0.328222,4.021405,-0.020506,4.072446,0.202766
4,4,3686,C(CCCCCCn1c2ccc(C#Cc3ccc(C=NOC(C)=O)cc3)cc2c2c...,0.833333,2,3,True,2.100914,3.510591,2.140261,3.788762,0.809907,3.919181,0.581851


In [15]:
smiles = df[df['S1'] > 2.5]['Smiles'].tolist()
print(smiles)

['CC(=O)O\\N=C\\c1ccc2c(c1)Sc1cc(/C=N/OC(C)=O)ccc1N2c1ccc2c(c1)cccc2', 'CCC(CCCC)Cn1c2ccc(C#Cc3ccc(C=NOC(C)=O)cc3)cc2c2cc(C#Cc3ccc(C=NOC(C)=O)cc3)ccc21', 'CCCCCCn1c2ccc(/C(C)=N/OC(C)=O)cc2c2cc(/C(C)=N/OC(C)=O)ccc21', 'CCCCCCC/C(=N\\OC(=O)c1ccccc1)c1ccc2c(c1)Sc1cc(/C(CCCCCCC)=N/OC(=O)c3ccccc3)ccc1N2c1ccccc1', 'C(CCCCCCn1c2ccc(C#Cc3ccc(C=NOC(C)=O)cc3)cc2c2c1ccc(C#Cc1ccc(C=NOC(C)=O)cc1)c2)C', 'c14c(cccc1/C(C)=N/OC(C)=O)cccc4/C(C)=N/OC(C)=O', 'CCCCCCn1c2ccc(C#Cc3ccc(C=NOC(C)=O)cc3)cc2c2cc(C#Cc3ccc(C=NOC(C)=O)cc3)ccc21', 'CCCCCCn1c2ccc(/C=N/OC(C)=O)cc2c2cc(/C=N/OC(C)=O)ccc21', 'O=C(O/N=C\\c1ccc2c(c1)Sc1cc(/C=N\\OC(C)=O)ccc1N2CCCC)C', 'CCCCCC(CCCCC)Cn1c2ccc(C#Cc3ccc(C=NOC(C)=O)cc3)cc2c2cc(C#Cc3ccc(C=NOC(C)=O)cc3)ccc21', 'CCCCCCn1c2ccc(C#Cc3ccc(C=NOC(C)=O)cc3)cc2c2cc(C#Cc3ccc(C=NOC(C)=O)cc3)ccc21', 'CCCCCCn1c2ccc(/C=N/OC(=O)c3ccccc3)cc2c2cc(/C=N/OC(=O)c3ccccc3)ccc21', 'CC(=O)O\\N=C\\c1ccc2c(c1)Sc1cc(/C=N/OC(=O)C)ccc12', 'CCCCCCCCCCn1c2ccc(C#Cc3ccc(C=NOC(C)=O)cc3)cc2c2cc(C#Cc3ccc(C=NOC(C)=O)cc

In [16]:
def mol2img(mol, img_save_path, text=None):
    # 分子の画像を生成
    img = Draw.MolToImage(mol, size=(300, 300))
    draw = ImageDraw.Draw(img)

    if text is not None:
        font = ImageFont.load_default()
        # テキストの位置を左上に変更
        text_position = (10, 10) # 左上
        draw.text(text_position, text, fill=(0, 0, 0), font=font)

    # 画像をファイルに保存
    img.save(img_save_path)

In [17]:
for i, smi in enumerate(smiles):
    mol = Chem.MolFromSmiles(smi)
    mol2img(mol, '{}.png'.format(i))