# Best Model Selection

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [5]:
df= sns.load_dataset('titanic')
X= df[['pclass', 'fare', 'sex', 'age','sibsp', 'parch']]
y= df['survived']
X= pd.get_dummies(X, columns=['sex'])
# fill the null values of age col
X['age'].fillna(X['age'].mean(), inplace=True)

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

# import the metrics from sk learn
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test= train_test_split(X,y, test_size=0.2, random_state=42)
models= [LogisticRegression(), DecisionTreeClassifier(), SVC(), KNeighborsClassifier(), RandomForestClassifier()]
model_names= ['logistic Regression', 'svm', 'kNN', 'Random Forest', 'Decision Tree']

models_scores=[]
for model , model_names in zip(models,model_names):
    model= model.fit(X_train, y_train)
    y_pred= model.predict(X_test)
    accuracy= accuracy_score(y_test, y_pred)
    models_scores.append([model_names, accuracy])

sorted_models= sorted(models_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print('Accuracy Score:',f'{model[0]}: {model[1]: 0.2f}')


Accuracy Score: logistic Regression:  0.81
Accuracy Score: Decision Tree:  0.81
Accuracy Score: svm:  0.75
Accuracy Score: Random Forest:  0.69
Accuracy Score: kNN:  0.66


In [7]:
# to check recall score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

# import the metrics from sk learn
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test= train_test_split(X,y, test_size=0.2, random_state=42)
models= [LogisticRegression(), DecisionTreeClassifier(), SVC(), KNeighborsClassifier(), RandomForestClassifier()]
model_names= ['logistic Regression', 'svm', 'kNN', 'Random Forest', 'Decision Tree']

models_scores=[]
for model , model_names in zip(models,model_names):
    model= model.fit(X_train, y_train)
    y_pred= model.predict(X_test)
    Recall_score= recall_score(y_test, y_pred)
    models_scores.append([model_names, accuracy])

sorted_models= sorted(models_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print('Recall Score:',f'{model[0]}: {model[1]: 0.2f}')

Recall Score: logistic Regression:  0.81
Recall Score: svm:  0.81
Recall Score: kNN:  0.81
Recall Score: Random Forest:  0.81
Recall Score: Decision Tree:  0.81


In [8]:
# to check precision score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

# import the metrics from sk learn
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test= train_test_split(X,y, test_size=0.2, random_state=42)
models= [LogisticRegression(), DecisionTreeClassifier(), SVC(), KNeighborsClassifier(), RandomForestClassifier()]
model_names= ['logistic Regression', 'svm', 'kNN', 'Random Forest', 'Decision Tree']

models_scores=[]
for model , model_names in zip(models,model_names):
    model= model.fit(X_train, y_train)
    y_pred= model.predict(X_test)
    Preciision= precision_score(y_test, y_pred)
    models_scores.append([model_names, accuracy])

sorted_models= sorted(models_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print('Precision Score:',f'{model[0]}: {model[1]: 0.2f}')

Precision Score: logistic Regression:  0.81
Precision Score: svm:  0.81
Precision Score: kNN:  0.81
Precision Score: Random Forest:  0.81
Precision Score: Decision Tree:  0.81


In [13]:
# to check f1 score of all models
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

# import the metrics from sk learn
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test= train_test_split(X,y, test_size=0.2, random_state=42)
models= [LogisticRegression(), DecisionTreeClassifier(), SVC(), KNeighborsClassifier(), RandomForestClassifier()]
model_names= ['logistic Regression', 'svm', 'kNN', 'Random Forest', 'Decision Tree']

models_scores=[]
for model , model_names in zip(models,model_names):
    model= model.fit(X_train, y_train)
    y_pred= model.predict(X_test)
    F1= f1_score(y_test, y_pred)
    models_scores.append([model_names, accuracy])

sorted_models= sorted(models_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
     print('f Score:',f'{model[0]}: {model[1]: 0.2f}')

f Score: logistic Regression:  0.81
f Score: svm:  0.81
f Score: kNN:  0.81
f Score: Random Forest:  0.81
f Score: Decision Tree:  0.81
