In [None]:
# -*- Fingerprinter.py -*-
"""
Created Jan 2019

@author: Elena Gelzintye / Timothy E H Allen
"""
#%%

# Import modules

import pandas as pd 
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import MACCSkeys

# Define paths and variables

'''
chemicals_path= binary activity file (.csv)
fingerprints_path= location for output (.csv)
fingerprint_length = length of genrerated fingerprint
fingerprint_radius = radius of gernerated fingerprint
'''


receptor = "AR"
chemicals_path = "/content/drive/My Drive/" + receptor + ".csv"
fingerprints_path = "/content/drive/My Drive/" + receptor + " fingerprints ECFP4 10000.csv"
fingerprint_length = 10000
fingerprint_radius = 4

smiles=pd.read_csv(chemicals_path)

# Define ECFP fingerprinting procedure

def get_fingerprint(smiles):
    '''smiles dataframe'''
    
    bit_infos=[]
    rdkit_molecules=[Chem.MolFromSmiles(x) for x in smiles['SMILES']]
    rdkit_fingerprint=[]
    for mol in rdkit_molecules:
        bit_info={}
        fp=rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, radius=fingerprint_radius, nBits=fingerprint_length, \
                                                                      bitInfo=bit_info).ToBitString() 
        bit_infos.append(bit_info)
        rdkit_fingerprint.append(fp)
        
    fingerprint_df=pd.DataFrame([np.array(list(x)).astype(int) for x in rdkit_fingerprint])
    
    return fingerprint_df, bit_infos

# Define MACCS fingerprinting procedure

def get_MACCS(smiles):
    '''smiles dataframe'''
    
    bit_infos=[]
    rdkit_molecules=[Chem.MolFromSmiles(x) for x in smiles['SMILES']]
    rdkit_fingerprint=[]
    for mol in rdkit_molecules:
        bit_info={}
        fp=MACCSkeys.GenMACCSKeys(mol).ToBitString() 
        bit_infos.append(bit_info)
        rdkit_fingerprint.append(fp)
        
    fingerprint_df=pd.DataFrame([np.array(list(x)).astype(int) for x in rdkit_fingerprint])
    
    return fingerprint_df, bit_infos

print('getting fingerprints')
fingerprints, substruct_lib=get_fingerprint(smiles)

fingerprints = pd.concat([fingerprints,smiles.drop(['SMILES'], axis = 1)], axis=1)

# Outputs fingerprints

fingerprints.to_csv(fingerprints_path, index = False)

#Endgame

print("END")

#%%