# Best Model Selection

In [5]:
# Importing Libraries
import pandas as pd 
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt 

# Loading DataSet 
df = sns.load_dataset('titanic')
x = df[['fare', 'sex', 'age', 'pclass', 'sibsp', 'parch']]
y = df['survived']

# Encoding categorical variable 'sex' and filling missing 'age' values
x = pd.get_dummies(x, columns=['sex'])
x.age.fillna(value=x['age'].mean(), inplace=True)

# Importing Machine Learning Libraries
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Splitting the data
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.85, random_state=42)

# Defining models and their names
models = [LogisticRegression(max_iter=1000), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier(), SVC()]
model_names = ['Logistic Regression', 'Decision Tree', 'Random Forest', 'K-Neighbors Classifier', 'SVC']

# Evaluating each model and storing their accuracy scores
model_score = []
for model, model_name in zip(models, model_names):
    model.fit(x_train, y_train)
    pred = model.predict(x_test)
    acc = accuracy_score(y_test, pred)
    model_score.append([model_name, acc])

# Sorting models by accuracy score
sorted_model = sorted(model_score, key=lambda x: x[1], reverse=True)
for model in sorted_model:
    print(f"Accuracy Score {model[0]}: {model[1]:.2f}")


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  x.age.fillna(value=x['age'].mean(), inplace=True)


Accuracy Score Random Forest: 0.84
Accuracy Score Logistic Regression: 0.83
Accuracy Score Decision Tree: 0.76
Accuracy Score K-Neighbors Classifier: 0.74
Accuracy Score SVC: 0.66


## Model Selection based on Precision Score

In [6]:
# Importing Libraries
import pandas as pd 
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt 

# Loading DataSet 
df = sns.load_dataset('titanic')
x = df[['fare', 'sex', 'age', 'pclass', 'sibsp', 'parch']]
y = df['survived']

# Encoding categorical variable 'sex' and filling missing 'age' values
x = pd.get_dummies(x, columns=['sex'])
x.age.fillna(value=x['age'].mean(), inplace=True)



# Importing Machine Learning Libraries
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import precision_score
from sklearn.model_selection import train_test_split

# Splitting the data
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=42)

# Defining models and their names
models = [LogisticRegression(max_iter=1000), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier(), SVC()]
model_names = ['Logistic Regression', 'Decision Tree', 'Random Forest', 'K-Neighbors Classifier', 'SVC']

# Evaluating each model and storing their accuracy scores
model_score = []
for model, model_name in zip(models, model_names):
    model.fit(x_train, y_train)
    pred = model.predict(x_test)
    acc = precision_score(y_test, pred)
    model_score.append([model_name, acc])

# Sorting models by accuracy score
sorted_model = sorted(model_score, key=lambda x: x[1], reverse=True)
for model in sorted_model:
    print(f"Precision Score : {model[0]}: {model[1]:.2f}")


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  x.age.fillna(value=x['age'].mean(), inplace=True)


Precision Score : Logistic Regression: 0.80
Precision Score : Random Forest: 0.78
Precision Score : SVC: 0.76
Precision Score : Decision Tree: 0.73
Precision Score : K-Neighbors Classifier: 0.66


## Model Selection using F1 Score

In [7]:
# Importing Libraries
import pandas as pd 
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt 

# Loading DataSet 
df = sns.load_dataset('titanic')
x = df[['fare', 'sex', 'age', 'pclass', 'sibsp', 'parch']]
y = df['survived']

# Encoding categorical variable 'sex' and filling missing 'age' values
x = pd.get_dummies(x, columns=['sex'])
x.age.fillna(value=x['age'].mean(), inplace=True)



# Importing Machine Learning Libraries
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

# Splitting the data
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=42)

# Defining models and their names
models = [LogisticRegression(max_iter=1000), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier(), SVC()]
model_names = ['Logistic Regression', 'Decision Tree', 'Random Forest', 'K-Neighbors Classifier', 'SVC']

# Evaluating each model and storing their accuracy scores
model_score = []
for model, model_name in zip(models, model_names):
    model.fit(x_train, y_train)
    pred = model.predict(x_test)
    f_one = f1_score(y_test, pred)
    model_score.append([model_name, f_one])

# Sorting models by accuracy score
sorted_model = sorted(model_score, key=lambda x: x[1], reverse=True)
for model in sorted_model:
    print(f"F1 Score : {model[0]}: {model[1]:.2f}")


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  x.age.fillna(value=x['age'].mean(), inplace=True)


F1 Score : Logistic Regression: 0.76
F1 Score : Random Forest: 0.75
F1 Score : Decision Tree: 0.70
F1 Score : K-Neighbors Classifier: 0.59
F1 Score : SVC: 0.38
