## Best Model selection

In [3]:
# import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

df = sns.load_dataset('titanic')
X = df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
y = df['survived']

X = pd.get_dummies(X, columns=['sex'])
X.age.fillna(value = X['age'].mean(), inplace=True)

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models = [LogisticRegression(),SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']

models_score = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    models_score.append([model_name, accuracy])

sorted_models = sorted(models_score, key=lambda x:x[1], reverse=True)
for model in sorted_models:
    print("Accuracy Score:", f'{model[0]} : {model[1]:.2f}')


Accuracy Score: Logistic Regression : 0.81
Accuracy Score: Random Forest : 0.79
Accuracy Score: Decision Tree : 0.75
Accuracy Score: KNN : 0.69
Accuracy Score: SVM : 0.66


In [5]:
models = [LogisticRegression(),SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']

models_score = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Recall = accuracy_score(y_test, y_pred)
    models_score.append([model_name, Recall])

sorted_models = sorted(models_score, key=lambda x:x[1], reverse=True)
for model in sorted_models:
    print("Recall Score:", f'{model[0]} : {model[1]:.2f}')

Recall Score: Logistic Regression : 0.81
Recall Score: Random Forest : 0.79
Recall Score: Decision Tree : 0.77
Recall Score: KNN : 0.69
Recall Score: SVM : 0.66
