In [17]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = sns.load_dataset('titanic')
X = df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
y = df['survived']
X = pd.get_dummies(X, columns=['sex'])
X['age'].fillna(X['age'].mean(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['age'].fillna(X['age'].mean(), inplace=True)


In [18]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models = [LogisticRegression(), SVC(), RandomForestClassifier(), DecisionTreeClassifier(), KNeighborsClassifier()]
model_names = ['logisticRegression', 'SVM', 'RandomForestClassifier', 'DecisionTreeClassifier', 'KNN']

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    models_scores.append([model_name, accuracy])
    
sorted_models = sorted(models_scores, key=lambda x:x[1], reverse=True)
for model in sorted_models:
    print("Accuracy_score: ", f'{model[0]} : {model[1]:.2f}')


Accuracy_score:  logisticRegression : 0.81
Accuracy_score:  RandomForestClassifier : 0.81
Accuracy_score:  DecisionTreeClassifier : 0.77
Accuracy_score:  KNN : 0.69
Accuracy_score:  SVM : 0.66


In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models = [LogisticRegression(), SVC(), RandomForestClassifier(), DecisionTreeClassifier(), KNeighborsClassifier()]
model_names = ['logisticRegression', 'SVM', 'RandomForestClassifier', 'DecisionTreeClassifier', 'KNN']

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Precision = precision_score(y_test, y_pred)
    models_scores.append([model_name, Precision])
    
sorted_models = sorted(models_scores, key=lambda x:x[1], reverse=True)
for model in sorted_models:
    print("Precision_score: ", f'{model[0]} : {model[1]:.2f}')

Precision_score:  logisticRegression : 0.80
Precision_score:  RandomForestClassifier : 0.77
Precision_score:  SVM : 0.76
Precision_score:  DecisionTreeClassifier : 0.70
Precision_score:  KNN : 0.66


In [24]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models = [LogisticRegression(), SVC(), RandomForestClassifier(), DecisionTreeClassifier(), KNeighborsClassifier()]
model_names = ['logisticRegression', 'SVM', 'RandomForestClassifier', 'DecisionTreeClassifier', 'KNN']

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Recall_score = recall_score(y_test, y_pred)
    models_scores.append([model_name, Recall_score])
    
sorted_models = sorted(models_scores, key=lambda x:x[1], reverse=True)
for model in sorted_models:
    print("Recall_score: ", f'{model[0]} : {model[1]:.2f}')

Recall_score:  RandomForestClassifier : 0.76
Recall_score:  logisticRegression : 0.72
Recall_score:  DecisionTreeClassifier : 0.69
Recall_score:  KNN : 0.54
Recall_score:  SVM : 0.26


In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models = [LogisticRegression(), SVC(), RandomForestClassifier(), DecisionTreeClassifier(), KNeighborsClassifier()]
model_names = ['logisticRegression', 'SVM', 'RandomForestClassifier', 'DecisionTreeClassifier', 'KNN']

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    F1_score = f1_score(y_test, y_pred)
    models_scores.append([model_name, F1_score])
    
sorted_models = sorted(models_scores, key=lambda x:x[1], reverse=True)
for model in sorted_models:
    print("F1_score: ", f'{model[0]} : {model[1]:.2f}')

F1_score:  logisticRegression : 0.76
F1_score:  RandomForestClassifier : 0.76
F1_score:  DecisionTreeClassifier : 0.71
F1_score:  KNN : 0.59
F1_score:  SVM : 0.38
