### Librairies 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import time
import warnings
warnings.filterwarnings('ignore')
from FingerPrint_Encoding import FingerprintGenerator
from Split_data import Split_data
from Classification_Benchmark_ML import Classification_Benchmark
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve ,RocCurveDisplay

## Data Loading

In [3]:
data = pd.read_csv ("Cidals_Leishmania_Dataset_final.csv")
data

In [4]:
type_counts = data['Activity'].value_counts() 
count = pd.DataFrame({'Activity': type_counts})
count.head ()

## Machine Learning 

In [3]:
shuffled_df = data.sample(frac=1).reset_index(drop=True)

### RDK Fingerprint 

In [4]:
model = "rdk_fp"
fingerprint_generator = FingerprintGenerator(model)
mols = fingerprint_generator.set_molecules(shuffled_df)
fingerprint_generator.generate_fingerprints()

In [5]:
data_y= shuffled_df.label.values

In [6]:
x_rd = np.load('rd_fp.npy')
split_size  = Split_data (0.2)
x_train, y_train, x_val, y_val, x_test, y_test = split_size.split (x_rd,data_y)

#### Random Forest 

In [5]:
model_name= 'RF'
fp_name = "Rdk"
data_state = "Original"
model = Classification_Benchmark (model_name, fp_name, data_state)
rf_model, score_train_rf = model.fit(x_train, y_train, export = True, name="original_rf_rdk_model")
df_rf_rdk , pred_val_rf, pred_test_rf =model.evaluate(rf_model,x_val, y_val , x_test, y_test, score_train_rf)
print ("classification report RF :",classification_report(pred_test_rf,y_test))

In [6]:
fpr, tpr,_ = roc_curve(y_test, pred_test_rf)
plt.plot(fpr, tpr, label='ROC curve' )
plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line for random classifier
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc="lower right")
plt.savefig("roc_auc_curve_RF_test.png", dpi =300)

####  Neural Network 

In [7]:
model_name= 'MLP'
fp_name = "Rdk"
data_state = "Original"
model = Classification_Benchmark (model_name, fp_name, data_state)
NN_model, score_train_nn = model.fit(x_train, y_train, export = True, name="original_nn_rdk_model")
df_NN_rdk, pred_val_nn, pred_test_nn =model.evaluate(NN_model, x_val, y_val , x_test, y_test, score_train_nn)
print ("classification report NN :",classification_report(pred_test_nn,y_test))

In [9]:
fpr, tpr,_ = roc_curve(y_test, pred_test_nn)
plt.plot(fpr, tpr, label='ROC curve' )
plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line for random classifier
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc="lower right")
plt.savefig("roc_auc_curve_NN_test.png", dpi =300)

#### NAIVE BAIES

In [10]:
model_name= 'NB'
fp_name = "Rdk"
data_state = "Original"
model = Classification_Benchmark (model_name, fp_name, data_state)
NB_model, score_train_nb = model.fit(x_train, y_train, export = True, name="original_nb_rdk_model")
df_nb_rdk, pred_val_nb, pred_test_nb =model.evaluate(NB_model, x_val, y_val , x_test, y_test, score_train_nb)
print ("classification report NB :",classification_report(pred_test_nb,y_test))

In [11]:
fpr, tpr,_ = roc_curve(y_test, pred_test_nb)
plt.plot(fpr, tpr, label='ROC curve' )
plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line for random classifier
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc="lower right")
plt.savefig("roc_auc_curve_NB_test.png", dpi =300)

#### XGBooost

In [12]:
model_name= 'GB'
fp_name = "Rdk"
data_state = "Original"
model = Classification_Benchmark (model_name, fp_name, data_state)
GB_model, score_train_gb = model.fit(x_train, y_train, export = True, name="original_GB_rdk_model")
sdf_gb_rdk, pred_val_gb, pred_test_gb =model.evaluate(GB_model, x_val, y_val , x_test, y_test, score_train_gb)
print ("classification report GB :",classification_report(pred_test_gb,y_test))

In [13]:
fpr, tpr,_ = roc_curve(y_test, pred_test_gb)
plt.plot(fpr, tpr, label='ROC curve' )
plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line for random classifier
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc="lower right")
plt.savefig("roc_auc_curve_GB_test.png", dpi =300)