#Best Model Selection

In [6]:
#import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#load dataset
df = sns.load_dataset('titanic')
X = df[["pclass","sex","age","sibsp","parch","fare"]]
y = df["survived"]
#preprocess data
X = pd.get_dummies(X, columns=["sex"])
X.age.fillna(X.age.mean(), inplace=True)

#iimpor sklearn libraries
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

#split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#initialize models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "SVM": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "KNN": KNeighborsClassifier()
}
#train and evaluate models
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    print(f"{name} - Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}, F1 Score: {f1:.2f}")

#sorted model
sorted_models = sorted(models.items(), key=lambda x: accuracy_score(y_test, x[1].predict(X_test)), reverse=True)
for name, model in sorted_models:
    print(f"{name} - Accuracy: {accuracy_score(y_test, model.predict(X_test)):.2f}")
    

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X.age.fillna(X.age.mean(), inplace=True)


Logistic Regression - Accuracy: 0.81, Precision: 0.80, Recall: 0.72, F1 Score: 0.76
SVM - Accuracy: 0.66, Precision: 0.76, Recall: 0.26, F1 Score: 0.38
Decision Tree - Accuracy: 0.77, Precision: 0.72, Recall: 0.70, F1 Score: 0.71
Random Forest - Accuracy: 0.79, Precision: 0.78, Recall: 0.70, F1 Score: 0.74
KNN - Accuracy: 0.69, Precision: 0.66, Recall: 0.54, F1 Score: 0.59
Logistic Regression - Accuracy: 0.81
Random Forest - Accuracy: 0.79
Decision Tree - Accuracy: 0.77
KNN - Accuracy: 0.69
SVM - Accuracy: 0.66


In [8]:
#import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#load dataset
df = sns.load_dataset('titanic')
X = df[["pclass","sex","age","sibsp","parch","fare"]]
y = df["survived"]
#preprocess data
X = pd.get_dummies(X, columns=["sex"])
X.age.fillna(X.age.mean(), inplace=True)

#import sklearn libraries
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
#split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
#initialize models
models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ["Logistic Regression", "SVM", "Decision Tree", "Random Forest", "KNN"]

model_scores = []
#train and evaluate models
for model, name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    model_scores.append({
        "Model": name,
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1
    })
    
    sorted_models = sorted(model_scores, key=lambda x: x["Accuracy"], reverse=True)
    for model in sorted_models:
        print(f"{model['Model']} - Accuracy: {model['Accuracy']:.2f}, Precision: {model['Precision']:.2f}, Recall: {model['Recall']:.2f}, F1 Score: {model['F1 Score']:.2f}")

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X.age.fillna(X.age.mean(), inplace=True)


Logistic Regression - Accuracy: 0.81, Precision: 0.80, Recall: 0.72, F1 Score: 0.76
Logistic Regression - Accuracy: 0.81, Precision: 0.80, Recall: 0.72, F1 Score: 0.76
SVM - Accuracy: 0.66, Precision: 0.76, Recall: 0.26, F1 Score: 0.38
Logistic Regression - Accuracy: 0.81, Precision: 0.80, Recall: 0.72, F1 Score: 0.76
Decision Tree - Accuracy: 0.77, Precision: 0.72, Recall: 0.70, F1 Score: 0.71
SVM - Accuracy: 0.66, Precision: 0.76, Recall: 0.26, F1 Score: 0.38
Logistic Regression - Accuracy: 0.81, Precision: 0.80, Recall: 0.72, F1 Score: 0.76
Random Forest - Accuracy: 0.79, Precision: 0.77, Recall: 0.72, F1 Score: 0.74
Decision Tree - Accuracy: 0.77, Precision: 0.72, Recall: 0.70, F1 Score: 0.71
SVM - Accuracy: 0.66, Precision: 0.76, Recall: 0.26, F1 Score: 0.38
Logistic Regression - Accuracy: 0.81, Precision: 0.80, Recall: 0.72, F1 Score: 0.76
Random Forest - Accuracy: 0.79, Precision: 0.77, Recall: 0.72, F1 Score: 0.74
Decision Tree - Accuracy: 0.77, Precision: 0.72, Recall: 0.70, F

In [13]:
#import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#load dataset
df = sns.load_dataset('titanic')
X = df[["pclass","sex","age","sibsp","parch","fare"]]
y = df["survived"]
#preprocess data
X = pd.get_dummies(X, columns=["sex"])
X.age.fillna(X.age.mean(), inplace=True)

#import sklearn libraries
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
#split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
#initialize models
models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ["Logistic Regression", "SVM", "Decision Tree", "Random Forest", "KNN"]

model_scores = []
#train and evaluate models
for model, name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    model_scores.append([name, accuracy])
    
sorted_models = sorted(model_scores,key=lambda x:x[1],reverse=True)
for model in sorted_models:
        print("Accuracy score:",f'{model[0]}:{model[1]:.2f}')
    

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X.age.fillna(X.age.mean(), inplace=True)


Accuracy score: Logistic Regression:0.81
Accuracy score: Random Forest:0.80
Accuracy score: Decision Tree:0.77
Accuracy score: KNN:0.69
Accuracy score: SVM:0.66


#Precision Score

In [15]:
#import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#load dataset
df = sns.load_dataset('titanic')
X = df[["pclass","sex","age","sibsp","parch","fare"]]
y = df["survived"]
#preprocess data
X = pd.get_dummies(X, columns=["sex"])
X.age.fillna(X.age.mean(), inplace=True)

#import sklearn libraries
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
#split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
#initialize models
models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ["Logistic Regression", "SVM", "Decision Tree", "Random Forest", "KNN"]

model_scores = []
#train and evaluate models
for model, name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    # accuracy = accuracy_score(y_test, y_pred)
    Precision = precision_score(y_test, y_pred)
    model_scores.append([name, Precision])
    
sorted_models = sorted(model_scores,key=lambda x:x[1],reverse=True)
for model in sorted_models:
        print("Precision_score:",f'{model[0]}:{model[1]:.2f}')
    

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X.age.fillna(X.age.mean(), inplace=True)


Precision_score: Logistic Regression:0.80
Precision_score: Random Forest:0.79
Precision_score: SVM:0.76
Precision_score: Decision Tree:0.71
Precision_score: KNN:0.66
