# Loading libraries

In [1]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from mordred import Calculator, descriptors

import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier




# Training the Machine learning models


## Loading the data for training

In [2]:
X_mordred=pd.read_csv('training_data/X_train_mordred.csv')
X_krfp=pd.read_csv('training_data/X_train_krfp.csv')
X_maccs=pd.read_csv('training_data/X_train_maccs.csv')
X_pubchem=pd.read_csv('training_data/X_train_pubchem.csv')
y_train=pd.read_csv('training_data/y_train.csv')
y=np.ravel(y_train)

In [3]:
#Training model for Mordred dataset
sc = StandardScaler()
X_mordred_normalized = sc.fit_transform(X_mordred)
model_mordred = RandomForestClassifier(bootstrap=True,random_state=42, max_depth=10, min_samples_split= 4, n_estimators=500 )
model_mordred.fit(X_mordred_normalized,y)

#Training model for KRFP fingerprint dataset
model_krfp=SVC(C=10, gamma=0.01, kernel= 'rbf')
model_krfp.fit(X_krfp,y)

#Training model for MACCS fingerprint
model_maccs=RandomForestClassifier(bootstrap=True,random_state=42, max_depth=15, min_samples_split= 5, n_estimators=500 )
model_maccs.fit(X_maccs,y)

#Training model for pubchem fingerprint
model_pubchem= RandomForestClassifier(bootstrap=True,random_state=42, max_depth=25, min_samples_split= 5, n_estimators=800 )
model_pubchem.fit(X_pubchem,y)

RandomForestClassifier(max_depth=25, min_samples_split=5, n_estimators=800,
                       random_state=42)

# Predicting the user's data

## For predicting Mordred's data using smiles

In [6]:
smile=input('Please enter smiles :')

Please enter smiles : c1ccccc1


In [7]:
mol = Chem.MolFromSmiles(smile)
mordred_descriptors = Calculator(descriptors, ignore_3D=True)
descriptor_list = list(mordred_descriptors._name_dict.keys())
calcu=mordred_descriptors(mol)
df=pd.DataFrame(calcu,index=descriptor_list)
df_1=df.T
data=pd.DataFrame(df_1,columns=X_mordred.columns.tolist())
data=sc.transform(data)
y_pred=model_mordred.predict(data)

for i in y_pred:
    if i==0:
        print('Inactive')
    if i==1:
        print('Active')
    

## For predicting activity based on KRFP fingerprint data

### Enter the csv file name within ' '.

In [None]:
data_krfp=pd.read_csv('')

In [None]:
data=pd.Dataframe(data_krfp, columns=X_krfp.columns.tolist())
y_pred=model_krfp.predict(data)
for i in y_pred:
    if i==0:
        print('Inactive')
    if i==1:
        print('Active')
    

## For predicting activity based on MACCS fingerprint data

### Enter the csv file name within ' '.

In [None]:
data_maccs=pd.read_csv('')

In [None]:
data=pd.Dataframe(data_maccs, columns=X_maccs.columns.tolist())
y_pred=model_maccs.predict(data)
for i in y_pred:
    if i==0:
        print('Inactive')
    if i==1:
        print('Active')
    

## For predicting activity based on MACCS fingerprint data

### Enter the csv file name within ' '.

In [None]:
data_pubchem=pd.read_csv('')

In [None]:
data=pd.Dataframe(data_pubchem, columns=X_pubchem.columns.tolist())
y_pred=model_pubchem.predict(data)
for i in y_pred:
    if i==0:
        print('Inactive')
    if i==1:
        print('Active')
    