In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

##    Description    Functions to manage SDFiles, pandas Dataframes ...
##                   Applicability Domain analysis
##                   
##    Authors:       Kevin Pinto Gil (kevin.pinto@upf.edu)
##                   Manuel Pastor (manuel.pastor@upf.edu)
##
##    Copyright 2018 Manuel Pastor
##
##    This file is part of PhiTools
##
##    PhiTools is free software: you can redistribute it and/or modify
##    it under the terms of the GNU General Public License as published by
##    the Free Software Foundation version 3.
##
##    PhiTools is distributed in the hope that it will be useful,
##    but WITHOUT ANY WARRANTY; without even the implied warranty of
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##    GNU General Public License for more details.
##
##    You should have received a copy of the GNU General Public License
##    along with PhiTools.  If not, see <http://www.gnu.org/licenses/>

# 1. Importing libraries

In [2]:
### General libraries

import pandas as pd
import numpy as np
from math import * #math commands will be available every time you start an interactive session

## RDkit libraries

from rdkit import Chem
from rdkit.Chem import Draw, PandasTools, AllChem, Descriptors, Crippen, DataStructs

## Dataframe visualization part

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.options.display.max_rows = 4000

## Ignore Warnings 

import warnings
warnings.filterwarnings('ignore')


*** Could not find EPA module. Will use only the CACTVS web service to resolve CAS number structures. ***



# 2. Converting functions from smiles to 3D coordinates using RDKIT

In [None]:
def addMolFromSmiletoPandasDF(df, smiColname, molColname):

    '''
    
    Info
    ----

    Function to add a molecule (in 1 Dimension) column to pandas dataframe from Smiles provided
    
    Parameters
    ----------
    
    df: DF  
        ## Dataframe containing all information
    smiColname: 'smiles' 
        ## the smiles column name 
    molColname: 'mol1D'
        ### molecule column name to be created
       
    Returns
    -------
    
    Dataframe with cl ('molecule list') removed from original dataframe.
    
    Example
    -------        

    addMolFromSmiletoPandasDF(df, smiColname, molColname)
         
    '''

    return PandasTools.AddMoleculeColumnToFrame(df,  smilesCol=smiColname, molCol= molColname)

In [None]:
def mol1Dto2Drdkit( x, smi = False, inchi = False ):

    '''
    
    Info
    ----

    Convert from smiles or 1D molecule to 2D using RDkit
    
    Parameters
    ----------
    
    x: rdkitmol  
        ## molecule in RDkit format
    smi: bool
        ## True if smiles provided False if not
    inchi: bool
        ## True if inchi provided False if not
       
    Returns
    -------
    
    Molecule with 2D coordinates without hidrogens removed. 
    
    Example
    -------        

    - smile provided:
        df['mol2D'] = df.smi.apply(lambda x: mol1Dto2Drdkit(x, smi=True))
    
    - inchi provided:
         df['mol2D'] = df.inchi.apply(lambda x: mol1Dto2Drdkit(x, inchi= True))
    - mol provided:
        df['mol2D'] = df.mol1D.apply(lambda x: mol1Dto2Drdkit(x))

    '''

    ### checking if x is smiles or molecule
    
    if smi == True:
        try:
            m = Chem.MolFromSmiles(x)
        except:
            raise ValueError('ERROR: no smile provided')

    if inchi == True:
        try:
            m = Chem.MolFromInchi(x, sanitize=False, removeHs=False)
        except:
            raise ValueError('ERROR: no smile provided')
    else:
        m = x
    
    ### Computing 2D coordinates
    
    AllChem.Compute2DCoords(m)
    
    return m

In [None]:
def mol2Dto3Drdkit(m, rmHs= True):

    '''
    
    Info
    ----

    Convert from 2D molecule to 3D using RDkit
    
    Parameters
    ----------
    
    m: rdkitmol  
        ## molecule in RDkit format
    rmHs: False 
        ## False if one wants to keep Hidrogens,True if you want to remove them. 
       
    Returns
    -------
    
    Molecule with 3D coordinates with or without hidrogens removed. 
    
    Example
    -------        

    df['mol3D'] = df.mol2D.apply(lambda x: mol2Dto3Drdkit(x, rmHs = True))

    '''

    m2 = Chem.AddHs(m)
    AllChem.EmbedMolecule(m2,AllChem.ETKDG())
    if rmHs == False:
        return m2
    if rmHs == True:
        m3 = Chem.RemoveHs(m2)
        return m3