In [54]:
import os
import pandas as pd
from rdkit import Chem
import networkx as nx
import matplotlib.pyplot as plt
from rdkit.Chem import PandasTools as pt

In [None]:
def CSVgraphF(self):
    '''Function to create graph features for molecules from csv file.'''

    df = pd.read_csv(self)
    df['mol'] = df['smiles'].map(lambda x: Chem.MolFromSmiles(x))
    
    def mol_to_nx(smi):
        G = nx.Graph()
        
        for atom in smi.GetAtoms():
            G.add_node(atom.GetIdx(),
                      atomic_num = atom.GetAtomicNum(),
                      is_aromatic = atom.GetIsAromatic(),
                      atom_symbol = atom.GetSymbol())
            
        for bond in smi.GetBonds():
            G.add_edge(bond.GetBeginAtomIdx(),
                      bond.GetEndAtomIdx(),
                      bond_type = bond.GetBondType())
            
        GNX = nx.to_numpy_matrix(G)
        return GNX
    
    df['feature'] = df['mol'].map(lambda x: mol_to_nx(x))
    df.to_csv(f"{os.path.dirname(self)}/{os.path.splitext(os.path.basename(self))[0]}-features.csv", index=False)

In [57]:
def CSVgraphFF(self):
    '''Function to create graph features for molecules from csv file.(extra features).'''

    df = pd.read_csv(self)
    df['mol'] = df['smiles'].map(lambda x: Chem.MolFromSmiles(x))
    
    def mol_to_nx(smi):
        G = nx.Graph()
        
        for atom in smi.GetAtoms():
            G.add_node(atom.GetIdx(),
                      atomic_num = atom.GetAtomicNum(),
                      is_aromatic = atom.GetIsAromatic(),
                      atom_symbol = atom.GetSymbol(),
                      atom_bonds = atom.GetBonds(),
                      atom_chiral = atom.GetChiralTag(),
                      atom_charge = atom.GetFormalCharge(),
                      atom_hybridization = atom.GetHybridization())
            
        for bond in smi.GetBonds():
            G.add_edge(bond.GetBeginAtomIdx(),
                      bond.GetEndAtomIdx(),
                      bond_type = bond.GetBondType(),
                      bond_stereo = bond.GetStereo())
            
        GNX = nx.to_numpy_matrix(G)
        return GNX
    
    df['feature'] = df['mol'].map(lambda x: mol_to_nx(x))
    df.to_csv(f"{os.path.dirname(self)}/{os.path.splitext(os.path.basename(self))[0]}-full_features.csv", index=False)

In [59]:
CSVgraphFF('/home/administrator/satvik/ml_dl/oral-bioavailability/test.csv')

In [75]:
def SDFgraphFF(self):
    '''Function to create graph features for molecules from sdf file.'''

    sdf = pt.LoadSDF(self, includeFingerprints=False)
    sdf['smiles'] = sdf['ROMol'].map(lambda x: Chem.MolToSmiles(x))
    
    def mol_to_nx(smi):
        G = nx.Graph()
        
        for atom in smi.GetAtoms():
            G.add_node(atom.GetIdx(),
                      atomic_num = atom.GetAtomicNum(),
                      is_aromatic = atom.GetIsAromatic(),
                      atom_symbol = atom.GetSymbol(),
                      atom_bonds = atom.GetBonds(),
                      atom_chiral = atom.GetChiralTag(),
                      atom_charge = atom.GetFormalCharge(),
                      atom_hybridization = atom.GetHybridization())
            
        for bond in smi.GetBonds():
            G.add_edge(bond.GetBeginAtomIdx(),
                      bond.GetEndAtomIdx(),
                      bond_type = bond.GetBondType(),
                      bond_stereo = bond.GetStereo())
            
        GNX = nx.to_numpy_matrix(G)
        return GNX
    
    sdf['feature'] = sdf['ROMol'].map(lambda x: mol_to_nx(x))
    df = sdf[['smiles','feature']]
    df.to_csv(f"{os.path.dirname(self)}/{os.path.splitext(os.path.basename(self))[0]}-sdf-full_features.csv", index=False)

In [76]:
SDFgraphFF('/home/administrator/satvik/ml_dl/oral-bioavailability/sch/ligprep_oral_bioavailability_processed/prepared.sdf')

