In [35]:
import pandas
import os
import rdkit.Chem as rkc
from rdkit.Chem import Crippen
from rdkit.Chem import Lipinski
from rdkit.Chem import Descriptors
from rdkit.Chem import Draw

In [36]:
sca_smi='NC1=NC(CC2=CC=CC=C2)=CN3C1=NC4=C3C=CC=C4'
sca_mol = rkc.MolFromSmiles(sca_smi)

In [37]:
with open(r'D:\Research\A2B\subtituents\L6sub-HTVS.smi', 'r') as f:
    sta_set = f.read()
    sta_set = sta_set.split('\n')
    sta_set = [i.split(' ')[0] for i in sta_set]

In [39]:
def sub_slice(smi):
    mol = rkc.MolFromSmiles(smi)
    sliced_mol=rkc.DeleteSubstructs(mol,sca_mol)
    return rkc.MolToSmiles(sliced_mol)

def mole_proper(mol):
    num_hdonors = Lipinski.NumHDonors(mol)
    num_hacceptors = Lipinski.NumHAcceptors(mol)
    num_rotatable = Lipinski.NumRotatableBonds(mol)
    num_aromatic = Lipinski.NumAromaticRings(mol)
    mol_weight = Descriptors.MolWt(mol)
    mol_logp = Crippen.MolLogP(mol)
    mol_TPSA = Descriptors.TPSA(mol)
    proper= [rkc.MolToSmiles(mol),num_hdonors, num_hacceptors, num_rotatable, num_aromatic, mol_weight, mol_logp, mol_TPSA]
    return proper

def save_excel(proper_list):
    df = pandas.DataFrame(proper_list, columns=['SMILES','HBD', 'HBA', 'Rotatable bones', 'Aromatic Rings','Mol Weight', 'LogP', 'TPSA'])
    df.to_excel('SubstituentsProperties.xlsx', index=None)

In [40]:
slice_li = [sub_slice(smi) for smi in sta_set]


In [25]:
proper_list=[]
for smi in slice_li:
    mol = rkc.MolFromSmiles(smi)
    proper = mole_proper(mol)
    proper_list.append(proper)
save_excel(proper_list)

In [41]:
file_name='R6'
os.mkdir(file_name + "_substituents_img")
for i, smile in enumerate(slice_li):
    if i <= 100:
        mol = rkc.MolFromSmiles(smile)
        Draw.MolToFile(mol, file_name + "_substituents_img/" + str(i) + ".png", size=(600, 600) , legend=smile)