In [None]:
from sklearn.linear_model import Lasso
from sklearn import preprocessing
from sklearn.impute import SimpleImputer
import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import train_test_split


In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
dataset=pd.read_csv('processed_data.csv')
#change the dataset using different features selection
data_selected=pd.read_csv('selected_stepwise.csv')
#data_selected=pd.read_csv('selected_l1.csv')
#data_selected=pd.read_csv('selected_genetic.csv')
data_selected=data_selected.drop(['label'],axis=1)
data_y=dataset['label']
data_selected

In [None]:
xtrain, xtest, ytrain, ytest = train_test_split(data_selected,data_y,test_size=0.3)

In [None]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score,roc_auc_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier

Decision Tree

In [None]:
#decision tree
parameters = {'max_depth': range(1,10,2), 'min_samples_split':range(2,20,3),'min_samples_leaf':range(10,50,10)}
model = DecisionTreeClassifier() 
grid_search = GridSearchCV(model, parameters, cv=5)
grid_search.fit(xtrain, ytrain)
grid_search.best_params_  

In [None]:
#result train
model = DecisionTreeClassifier(criterion="gini", max_depth=3, min_samples_split=10,min_samples_leaf=30)
# Train the model
dt = model.fit(xtrain, ytrain)
# Make predictions on the training set
ypred_train = dt.predict(xtrain)
# Calculate metrics using the training data
accuracy_train = accuracy_score(ytrain, ypred_train)
recall_train = recall_score(ytrain, ypred_train)  # For multi-class, specify the average method
precision_train = precision_score(ytrain, ypred_train )
f1_train = f1_score(ytrain, ypred_train)
conf_matrix_train = confusion_matrix(ytrain, ypred_train)
ytrain_proba = dt.predict_proba(xtrain)
auc_train= roc_auc_score(ytrain, ypred_train)
# Print the metrics
print(f"Training Set Evaluation Metrics:")
print(f"Accuracy: {accuracy_train}")
print(f"Recall: {recall_train}")
print(f"Precision: {precision_train}")
print(f"F1 Score: {f1_train}")
print(f"AUC:{auc_train}" )
#print("Confusion Matrix:\n", conf_matrix_train)

In [None]:
#result test
ypred = model.predict(xtest)
accuracy_test = accuracy_score(ytest, ypred)
print(f"Accuracy: {accuracy_test}")
recall_test = recall_score(ytest, ypred) 
print(f"Recall: {recall_test}")
precision_test= precision_score(ytest, ypred)  
print(f"Precision: {precision_test}")
f1_test = f1_score(ytest, ypred)  
print(f"F1 Score: {f1_test}")
conf_matrix_test = confusion_matrix(ytest, ypred)
#print("Confusion Matrix:\n", conf_matrix_test)
auc_test= roc_auc_score(ytest, ypred)
print(f"AUC:{auc_test}")

Random Forest

In [None]:
#random forest(find the best parameters)
from sklearn import ensemble
parameters = {'n_estimators':range(1,101,10)}
rf = ensemble.RandomForestClassifier() 
grid_search = GridSearchCV(rf, parameters,scoring='accuracy', cv=5)
grid_search.fit(xtrain, ytrain)
grid_search.best_params_

In [None]:
parameters = {'max_depth':range(1,7,1),'max_features':range(5,30,5), 'min_samples_split':range(20,50,5)}
rf = ensemble.RandomForestClassifier(n_estimators = 41) 
grid_search = GridSearchCV(rf, parameters,scoring='accuracy', cv=5)
grid_search.fit(xtrain, ytrain)
grid_search.best_params_

In [None]:
#result train
rf = ensemble.RandomForestClassifier(n_estimators=121,max_depth=5, min_samples_split=45,max_features=15)
# Train the model
rf = rf.fit(xtrain, ytrain)
# Make predictions on the training set
ypred_train = rf.predict(xtrain)
# Calculate metrics using the training data
accuracy_train = accuracy_score(ytrain, ypred_train)
recall_train = recall_score(ytrain, ypred_train)  
precision_train = precision_score(ytrain, ypred_train)
f1_train = f1_score(ytrain, ypred_train)
conf_matrix_train = confusion_matrix(ytrain, ypred_train)
auc_train= roc_auc_score(ytrain, ypred_train)
# Print the metrics
print(f"Training Set Evaluation Metrics:")
print(f"Accuracy: {accuracy_train}")
print(f"Recall: {recall_train}")
print(f"Precision: {precision_train}")
print(f"F1 Score: {f1_train}")
print(f"AUC:{auc_train}")
#print("Confusion Matrix:\n", conf_matrix_train)
#result test
print(f"Test Set Evaluation Metrics:")
ypred = rf.predict(xtest)
accuracy_test = accuracy_score(ytest, ypred)
print(f"Accuracy: {accuracy_test}")
recall_test = recall_score(ytest, ypred)  
print(f"Recall: {recall_test}")
precision_test= precision_score(ytest, ypred)  
print(f"Precision: {precision_test}")
f1_test = f1_score(ytest, ypred)  
print(f"F1 Score: {f1_test}")
conf_matrix_test = confusion_matrix(ytest, ypred)
#print("Confusion Matrix:\n", conf_matrix_test)
auc_test= roc_auc_score(ytest, ypred)
print(f"AUC:{auc_test}")

SVM

In [None]:
#svm(find the best parameters)
from sklearn import svm
param_grid={"C":[0.001,0.01,0.1,1,10], "gamma": [0.1,0.01,0.001,0.0001,0.00001]},
svm=svm.SVC()
grid_search = GridSearchCV(svm, param_grid,scoring='accuracy', cv=5)
grid_search.fit(xtrain, ytrain)
grid_search.best_params_

In [None]:
#result train
from sklearn import svm
svm =svm.SVC(kernel='rbf', C=15, gamma=0.01)
# Train the model
svm = svm.fit(xtrain, ytrain)
# Make predictions on the training set
ypred_train = svm.predict(xtrain)
# Calculate metrics using the training data
accuracy_train = accuracy_score(ytrain, ypred_train)
recall_train = recall_score(ytrain, ypred_train)  
precision_train = precision_score(ytrain, ypred_train)
f1_train = f1_score(ytrain, ypred_train)
conf_matrix_train = confusion_matrix(ytrain, ypred_train)
auc_train= roc_auc_score(ytrain, ypred_train)
# Print the metrics
print(f"Training Set Evaluation Metrics:")
print(f"Accuracy: {accuracy_train}")
print(f"Recall: {recall_train}")
print(f"Precision: {precision_train}")
print(f"F1 Score: {f1_train}")
print(f"AUC:{auc_train}")
#print("Confusion Matrix:\n", conf_matrix_train)

#result test
ypred = svm.predict(xtest)

accuracy_test = accuracy_score(ytest, ypred)
print(f"Accuracy: {accuracy_test}")
recall_test = recall_score(ytest, ypred) 
print(f"Recall: {recall_test}")
precision_test= precision_score(ytest, ypred)  
print(f"Precision: {precision_test}")
f1_test = f1_score(ytest, ypred)  
print(f"F1 Score: {f1_test}")
conf_matrix_test = confusion_matrix(ytest, ypred)
#print("Confusion Matrix:\n", conf_matrix_test)
auc_test= roc_auc_score(ytest, ypred)
print(f"AUC:{auc_test}")

XGBoost

In [None]:

from xgboost.sklearn import XGBClassifier
cv_params = {'learning_rate': [0.01, 0.015, 0.025, 0.05, 0.1]}
model = XGBClassifier(max_depth=5,min_child_weight=1,seed=0,subsample=0.8,gamma=0,reg_alpha=0,reg_lambda=1)
grid_search= GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=5, verbose=1, n_jobs=4)
grid_search.fit(xtrain, ytrain)
grid_search.best_params_

In [None]:

cv_params = {'max_depth': range(1,10,1), 'min_child_weight': [1, 3, 5, 7]}

model = XGBClassifier(learning_rate=0.025,seed=0,subsample=0.8,gamma=0,reg_alpha=0,reg_lambda=1)
grid_search= GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=5, verbose=1, n_jobs=4)
grid_search.fit(xtrain, ytrain)
grid_search.best_params_

In [None]:

cv_params = {'gamma':[0, 0.05,0.07,0.1,0.3, 0.5, 0.7, 0.9, 1]}

model = XGBClassifier(learning_rate=0.025,max_depth=6,min_child_weight=3,seed=0,subsample=0.8,reg_alpha=0,reg_lambda=1)
grid_search= GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=5, verbose=1, n_jobs=4)
grid_search.fit(xtrain, ytrain)
grid_search.best_params_

In [None]:
cv_params = {'reg_alpha': [0, 0.01,0.05,0.1, 1], 'reg_lambda':[0, 0.1, 0.5, 1]}
model = XGBClassifier(learning_rate=0.025,max_depth=6,min_child_weight=3,seed=0,subsample=0.8,gamma=0)
grid_search= GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=5, verbose=1, n_jobs=4)
grid_search.fit(xtrain, ytrain)
grid_search.best_params_

In [None]:
#XGBoost
from xgboost.sklearn import XGBClassifier
xgbt = XGBClassifier(
    gamma=0,  
    learning_rate=0.025, 
    max_depth=6,  
    subsample=0.8,
    reg_alpha=0.1,
    reg_lambda=1,
    min_child_weight=3
)
# Train the model
xgbt = xgbt.fit(xtrain, ytrain)
# Make predictions on the training set
ypred_train = xgbt.predict(xtrain)
# Calculate metrics using the training data
accuracy_train = accuracy_score(ytrain, ypred_train)
recall_train = recall_score(ytrain, ypred_train)  
precision_train = precision_score(ytrain, ypred_train)
f1_train = f1_score(ytrain, ypred_train)
conf_matrix_train = confusion_matrix(ytrain, ypred_train)
auc_train= roc_auc_score(ytrain, ypred_train)
# Print the metrics
print(f"Training Set Evaluation Metrics:")
print(f"Accuracy: {accuracy_train}")
print(f"Recall: {recall_train}")
print(f"Precision: {precision_train}")
print(f"F1 Score: {f1_train}")
print(f"AUC:{auc_train}")

ypred=xgbt.predict(xtest)
accuracy_test = accuracy_score(ytest, ypred)
print(f"Accuracy: {accuracy_test}")
recall_test = recall_score(ytest, ypred)  
print(f"Recall: {recall_test}")
precision_test= precision_score(ytest, ypred)  
print(f"Precision: {precision_test}")
f1_test = f1_score(ytest, ypred)  
print(f"F1 Score: {f1_test}")
conf_matrix_test = confusion_matrix(ytest, ypred)
#print("Confusion Matrix:\n", conf_matrix_test)
auc_test= roc_auc_score(ytest, ypred)
print(f"AUC:{auc_test}")