# Best Model Selection

In [2]:
# import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

df=sns.load_dataset('titanic')  #loading dataset
X=df[['pclass','sex','age','sibsp','parch','fare']]  #define X 
y=df['survived'] #define y
X = pd.get_dummies(X, columns=['sex'])  #label encoding
X.age.fillna(X['age'].mean(), inplace=True)  #fill missing value


from sklearn.linear_model import LogisticRegression #Logistic Regression is a type of supervised learning algorithm used for classification
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# features stored in X and labels in y
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Accuracy score
models= [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM','Decision Tree', 'Random Forest','KNN']

models_scores=[]
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    models_scores.append([model_name, accuracy])

sorted_models= sorted(models_scores, key=lambda x: x[1], reverse=True) #minimal coding (code simplifying)
for model in sorted_models:
    print('Accuracy score: ', f'{model[0]} : {model[1]:.2f}')

Accuracy score:  Logistic Regression : 0.81
Accuracy score:  Random Forest : 0.79
Accuracy score:  Decision Tree : 0.77
Accuracy score:  KNN : 0.69
Accuracy score:  SVM : 0.66


In [10]:
# precision score
models= [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM','Decision Tree', 'Random Forest','KNN']

models_scores=[]
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Precision = precision_score(y_test, y_pred)
    models_scores.append([model_name, Precision])

sorted_models= sorted(models_scores, key=lambda x: x[1], reverse=True) #minimal coding (code simplifying)
for model in sorted_models:
    print('Precision score: ', f'{model[0]} : {model[1]:.2f}')

Precision score:  Logistic Regression : 0.80
Precision score:  Random Forest : 0.79
Precision score:  SVM : 0.76
Precision score:  Decision Tree : 0.71
Precision score:  KNN : 0.66


In [11]:
# Recall score
models= [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM','Decision Tree', 'Random Forest','KNN']

models_scores=[]
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Recall = recall_score(y_test, y_pred)
    models_scores.append([model_name, Recall])

sorted_models= sorted(models_scores, key=lambda x: x[1], reverse=True) #minimal coding (code simplifying)
for model in sorted_models:
    print('Recall score: ', f'{model[0]} : {model[1]:.2f}')

Recall score:  Logistic Regression : 0.72
Recall score:  Random Forest : 0.72
Recall score:  Decision Tree : 0.69
Recall score:  KNN : 0.54
Recall score:  SVM : 0.26


In [12]:
# F1 Score
models= [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM','Decision Tree', 'Random Forest','KNN']

models_scores=[]
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    F1 = f1_score(y_test, y_pred)
    models_scores.append([model_name, F1])

sorted_models= sorted(models_scores, key=lambda x: x[1], reverse=True) #minimal coding (code simplifying)
for model in sorted_models:
    print('F1 score: ', f'{model[0]} : {model[1]:.2f}')

F1 score:  Random Forest : 0.77
F1 score:  Logistic Regression : 0.76
F1 score:  Decision Tree : 0.71
F1 score:  KNN : 0.59
F1 score:  SVM : 0.38


---
# July 15, 2023