Check for matches for product molecule in generated reaction network

In [None]:
from rdkit import Chem
from rdkit.DataStructs import FingerprintSimilarity as tanimoto
from rdkit.DataStructs.cDataStructs import ExplicitBitVect
from rdkit.Chem import Draw
from rdkit.Chem import AllChem,MACCSkeys
import pandas as pd
from ast import literal_eval
from joblib import Parallel, delayed

In [None]:
class Product_Match():
    def __init__(self,filepath):
        #filepath : Path to file generated from Network_AllCombo
        self.df=pd.read_csv(filepath,header=0,low_memory=False)
        self.keys = self.df.keys()[1:]
    def GenMACCS(self,smiles):
        #smiles : list of smiles
        fp=[]
        for x in smiles:
            if not(x.isdigit()): # Repeat molecules are stored as digits
                fp.append(MACCSkeys.GenMACCSKeys(Chem.MolFromSmiles(x)))
            else:
                fp.append(ExplicitBitVect(167))
    
        return fp
    
    def Similarity(self,smiles,target):
        # smiles : SMILES of the molecule (string)
        # target : SMARTS pattern of query to be matched against (list)
        score=[]
        for x in smiles:
            s=0
            if not (x.isdigit()):
                try:
                    mol=Chem.MolFromSmiles(x)
                    for t in target:
                        if  mol.HasSubstructMatch(t):
                            s+=1
                        else:
                            s+=0
                    score.append(s)
                except:
                    score.append(-1)
        return score
    
    def MatchFP(self,target,savepath):
        # target : SMILES pattern to be recogonized in products
        similarity=pd.DataFrame(columns=['Smiles','Gen','Id','Similarity'])
        targetmol = Chem.MolFromSmarts(target)
        if targetmol is None:
            print('Error parsing SMILES')
            pass
        else:
            targetmol.UpdatePropertyCache()
            Chem.GetSymmSSSR(targetmol)
            targetfp=MACCSkeys.GenMACCSKeys(targetmol)
        for key in  self.keys:
            smileslist=self.df[key]
            smileslist=smileslist.dropna()
            newlist=[]
            for s in smileslist:
                newlist.append(literal_eval(s))
            FingerPrints=Parallel(n_jobs=-1,verbose=1)(delayed(self.GenMACCS)(smiles) for smiles in newlist) #FingerPrints is a list of list of Fingerprints
            for i,fp in enumerate(FingerPrints):
                score=[tanimoto(x,targetfp) for x in fp]
                if any(val>=0.2 for val in score):
                    similarity=pd.concat([similarity,pd.DataFrame({'Smiles':str(newlist[i]),'Gen': key,'Id':i,'Similarity':score})],ignore_index=True)
        similarity.to_csv(savepath)
        
        
    def MatchSub(self,target,cutoff,savepath):
        # target : list of SMARTS pattern to be recogonized in products
        similarity=pd.DataFrame(columns=['Smiles','Gen','Id','Similarity'])
        targetmol = [Chem.MolFromSmarts(t) for t in target]
        if targetmol is None:
            print('Error parsing SMILES')
            pass
        for key in  self.keys:
            smileslist=self.df[key]
            smileslist=smileslist.dropna()
            newlist=[]
            for s in smileslist:
                newlist.append(literal_eval(s))
            score=Parallel(n_jobs=-1,verbose=1)(delayed(self.Similarity)(smiles,targetmol) for smiles in newlist) #FingerPrints is a list of list of Fingerprints
            for i,fp in enumerate(score):
                if any(val>=cutoff for val in fp):
                    similarity_sub=pd.concat([similarity_sub,pd.DataFrame({'Smiles':str(newlist[i]),'Gen': key,'Id':i,'Similarity':score})],ignore_index=True)
        similarity.to_csv(savepath)
    

In [None]:
# Example use

obj = Product_Match(r'Biomass/HMFRev_NoRepeat_Reduced_Nghb.csv') # File containing network generated from Network_Allcombo
target = 'OCc1ccco1'
obj.MatchFP(target,r'C:\Users\ks\Desktop\ReactionNetwork\Bit_example.csv')
targetsmarts = ['[c]','[$([CX3]=[CX3])]']
obj.MatchSub(targetsmarts,2,r'C:\Users\ks\Desktop\ReactionNetwork\BitSub_example.csv')

In [None]:
#SMARTS Pattern of different functional groups 
#alkane=Chem.MolFromSmarts('[CX4]')
#alkene=Chem.MolFromSmarts('[$([CX3]=[CX3])]')
#alkyne=Chem.MolFromSmarts('[$([CX2]#[CX2])]')
#arene=Chem.MolFromSmarts('[c]')
#ketone=Chem.MolFromSmarts('[#6][CX3](=[O])[#6]') 
#aldehyde=Chem.MolFromSmarts('[CX3H1](=[O])')
#ester=Chem.MolFromSmarts('[#6][CX3](=[O])[OX2H0][#6]')
#acid=Chem.MolFromSmarts('[#6][CX3](=[O])[OX2H1]')
#alcohol=Chem.MolFromSmarts('[CHX4][OX2H]')
#amine=Chem.MolFromSmarts('[NX3;H2,H1;!$(NC=O)]')
#halide=Chem.MolFromSmarts('[CX4][F,Cl,Br,I]')
#ether=Chem.MolFromSmarts('[OD2]([#6])[#6]')
#nitro=Chem.MolFromSmarts('[$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8]')