In [1]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score, recall_score, confusion_matrix


In [2]:
x_train = pd.read_csv("x_train.csv")
y_train = pd.read_csv("y_train.csv")
x_test = pd.read_csv("x_test.csv")
y_test = pd.read_csv("y_test.csv")
x_valid = pd.read_csv("x_valid.csv")
y_valid = pd.read_csv("y_valid.csv")


In [3]:
# creating model evaluation function to give all metrics after model training
def model_eval(y_test, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    accuracy = (tp + tn) / (tp + tn + fp + fn)
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    specificity = tn / (fp + tn)
    F1_Score = 2 * (recall * precision) / (recall + precision)
    result = {"Accuracy": accuracy, "Precision": precision, "Recall": recall, 'Specificity': specificity, 'F1': F1_Score}
    return result

In [4]:
models = {
    "LogisticRegression": LogisticRegression(max_iter=1000, solver='liblinear'),
    "K-Neighbors Classifier": KNeighborsClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest Classifier": RandomForestClassifier(),
    "XGBClassifier": XGBClassifier(),
}

model_names = list(models.keys())
model_list = []
accuracy_score_list = []
Recall_score_list = []

for i in range(len(model_names)):
    model = list(models.values())[i]
    scaler = MinMaxScaler()
    x_train_scaled = scaler.fit_transform(x_train)
    x_test_scaled = scaler.transform(x_test)
    model.fit(x_train_scaled, y_train.values.ravel())  # Train model

    # Make predictions
    Y_train_pred = model.predict(x_train_scaled)
    Y_test_pred = model.predict(x_test_scaled)

    # Evaluate Train and Test dataset
    model_train_results = model_eval(y_train.values.ravel(), Y_train_pred)
    model_test_results = model_eval(y_test.values.ravel(), Y_test_pred)

    model_train_accuracy_score = model_train_results["Accuracy"]
    model_train_recall_score = model_train_results["Recall"]

    model_test_accuracy_score = model_test_results["Accuracy"]
    model_test_recall_score = model_test_results["Recall"]

    print(model_names[i])
    model_list.append(model_names[i])

    print('Model performance for Training set')
    print("- accuracy_score: {} ".format(model_train_accuracy_score))
    print("- recall_score: {}".format(model_train_recall_score))
    print('----------------------------------')

    print('Model performance for Test set')
    print("- accuracy_score: {}".format(model_test_accuracy_score))
    print("- recall_score: {}".format(model_test_recall_score))

    accuracy_score_list.append(model_test_accuracy_score)

    print('=' * 35)
    print('\n')


LogisticRegression
Model performance for Training set
- accuracy_score: 0.7262375087148502 
- recall_score: 0.6925400883104811
----------------------------------
Model performance for Test set
- accuracy_score: 0.7024367385192127
- recall_score: 0.6541705716963448


K-Neighbors Classifier
Model performance for Training set
- accuracy_score: 0.9025098768301185 
- recall_score: 0.9549151754589821
----------------------------------
Model performance for Test set
- accuracy_score: 0.7132146204311153
- recall_score: 0.6419868791002812


Decision Tree
Model performance for Training set
- accuracy_score: 0.9991866139902393 
- recall_score: 1.0
----------------------------------
Model performance for Test set
- accuracy_score: 0.8078725398313027
- recall_score: 0.711340206185567


Random Forest Classifier
Model performance for Training set
- accuracy_score: 0.9990704159888449 
- recall_score: 0.9997676039972112
----------------------------------
Model performance for Test set
- accuracy_score:

In [5]:
pd.DataFrame(list(zip(model_list, accuracy_score_list)), columns=['Model Name', 'accuracy_Score']).sort_values(by=["accuracy_Score"],ascending=False)

Unnamed: 0,Model Name,accuracy_Score
4,XGBClassifier,0.842549
3,Random Forest Classifier,0.828022
2,Decision Tree,0.807873
1,K-Neighbors Classifier,0.713215
0,LogisticRegression,0.702437
