In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

##    Description    Functions to manage SDFiles, pandas Dataframes ...
##                   Applicability Domain analysis
##                   
##    Authors:       Kevin Pinto Gil (kevin.pinto@upf.edu)
##                   Manuel Pastor (manuel.pastor@upf.edu)
##
##    Copyright 2018 Manuel Pastor
##
##    This file is part of PhiTools
##
##    PhiTools is free software: you can redistribute it and/or modify
##    it under the terms of the GNU General Public License as published by
##    the Free Software Foundation version 3.
##
##    PhiTools is distributed in the hope that it will be useful,
##    but WITHOUT ANY WARRANTY; without even the implied warranty of
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##    GNU General Public License for more details.
##
##    You should have received a copy of the GNU General Public License
##    along with PhiTools.  If not, see <http://www.gnu.org/licenses/>

# 1. Importing libraries

In [1]:
### General libraries

import pandas as pd
import numpy as np
from math import * #math commands will be available every time you start an interactive session

## RDkit libraries

from rdkit import Chem
from rdkit.Chem import Draw, PandasTools, AllChem, Descriptors, Crippen, DataStructs

### Standardise a molecule libraries

from standardiser import process_smiles as ps
from standardiser import neutralise
from molvs import tautomer
from phitools import moleculeHelper as mh

## Dataframe visualization part

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.options.display.max_rows = 4000

## Ignore Warnings 

import warnings
warnings.filterwarnings('ignore')


*** Could not find EPA module. Will use only the CACTVS web service to resolve CAS number structures. ***



# 2. Normalization

## 2.1. Standardize molecules 
- using Standardiser (F. Atkinson) plus Bet Gregori Modifications

In [2]:
def getNormMol (smiles, neutralize=True):
    
    '''
    Info
    ----
    This function it is going to:
       - use standardiser for removing salts, mixtures, metal ions.
       - use molvs for canonicalize the smile keeping tautomeric form
         and decouple metal covalent bonds.
       - use standardiser to neutralize smiles.
    
    Parameters
    ----------
    
    smiles = 'smiles' 
        ## smile column name
    df = df 
        ## dataframe name
    neutralize = True 
        ## True (default Value) if one wants to neutralize the smile, otherwise False. 
    
    Return
    ------
    
    pandas series with ( inchikey, standard smiles, Standardisation information )
    
    Example
    -------
    
    One can run the function like this and join 3 new colulmns to your dataframe
    containing the standardization results. 
    
           df[['parent_std_inkey', 'std_smiles', 'info_standardization']] = DF.apply(
            lambda row: getNormMol(row['SMILES'], df, NO), axis=1)
    '''
    canon = tautomer.TautomerCanonicalizer()
    mol = Chem.MolFromSmiles(smiles)
    stdD = ps.std(mol)
    if len(stdD) > 1:
        results = pd.Series(('NA', 'NA', 'Not Passed: There is more than 1 molecule'))
    else:
        std_smiles = list(stdD.keys()).pop()
        (mol, ismetal, passed, errmessage) = stdD[std_smiles]
        if ismetal:
            results = pd.Series(('METAL', 'METAL', 'Not passed: There is Metal Ion'))
        elif not passed:
            mol = canon.canonicalize(mol)
            if neutralize == True:
                mol = neutralise.run(mol)
            std_smiles = Chem.MolToSmiles(mol, isomericSmiles=True)
            inchi = Chem.MolToInchi(mol)
            inchikey = Chem.InchiToInchiKey(inchi)
            ns_inchi = Chem.MolToInchi(mol, options='/FixedH')
            ns_inchikey = Chem.InchiToInchiKey(ns_inchi)
            results = pd.Series((inchikey, std_smiles,'Smile Not Standardised'))

        else:
            mol = canon.canonicalize(mol)
            if neutralize == True:
                mol = neutralise.run(mol)
            std_smiles = Chem.MolToSmiles(mol, isomericSmiles=True)
            inchi = Chem.MolToInchi(mol)
            inchikey = Chem.InchiToInchiKey(inchi)
            ns_inchi = Chem.MolToInchi(mol, options='/FixedH')
            ns_inchikey = Chem.InchiToInchiKey(ns_inchi)
            results = pd.Series((inchikey, std_smiles, 'Smile Standardised'))
    return results