Best Model and best parameters hyper tunining of model parameters

### Accuracy & Precision score

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split, GridSearchCV

# Load the Titanic dataset from seaborn
df = sns.load_dataset('titanic')
df['age'] = df['age'].fillna(df['age'].mean())

# Define the input features (X) and the output (y)
X = df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
y = df['survived']
X = pd.get_dummies(X, columns=['sex'])

# Create the models
models = [
    KNeighborsClassifier(),
    DecisionTreeClassifier(),
    RandomForestClassifier()
]
model_names = ['KNN', 'Decision Tree', 'Random Forest']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Perform grid search for each model and print the results
for model, model_name in zip(models, model_names):
    param_grid = {'n_neighbors': np.arange(1, 30, 2), 'weights': ['uniform', 'distance']}
    if model_name == 'Decision Tree':
        param_grid = {'max_depth': [None, 5, 10, 15]}
    elif model_name == 'Random Forest':
        param_grid = {'n_estimators': [100, 200, 300], 'max_depth': [None, 5, 10]}
        
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='precision')
    grid_search.fit(X_train, y_train)
    
    y_pred = grid_search.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    
    print(f"Model: {model_name}")
    print("Best Parameters:", grid_search.best_params_)
    print("Best Score:", grid_search.best_score_)
    print("Accuracy Score:", accuracy)
    print("Precision Score:", precision)
    print("-" * 50)


Model: KNN
Best Parameters: {'n_neighbors': 29, 'weights': 'distance'}
Best Score: 0.6765567765567765
Accuracy Score: 0.7262569832402235
Precision Score: 0.7272727272727273
--------------------------------------------------
Model: Decision Tree
Best Parameters: {'max_depth': 5}
Best Score: 0.8085194279936443
Accuracy Score: 0.8044692737430168
Precision Score: 0.8305084745762712
--------------------------------------------------
Model: Random Forest
Best Parameters: {'max_depth': 5, 'n_estimators': 100}
Best Score: 0.8357601703848718
Accuracy Score: 0.7988826815642458
Precision Score: 0.796875
--------------------------------------------------


### Recall score

In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split, GridSearchCV

# Load the Titanic dataset from seaborn
df = sns.load_dataset('titanic')
df['age'] = df['age'].fillna(df['age'].mean())

# Define the input features (X) and the output (y)
X = df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
y = df['survived']
X = pd.get_dummies(X, columns=['sex'])

# Create the models
models = [
    KNeighborsClassifier(),
    DecisionTreeClassifier(),
    RandomForestClassifier()
]
model_names = ['KNN', 'Decision Tree', 'Random Forest']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Perform grid search for each model and print the results
for model, model_name in zip(models, model_names):
    param_grid = {'n_neighbors': np.arange(1, 30, 2), 'weights': ['uniform', 'distance']}
    if model_name == 'Decision Tree':
        param_grid = {'max_depth': [None, 5, 10, 15]}
    elif model_name == 'Random Forest':
        param_grid = {'n_estimators': [100, 200, 300], 'max_depth': [None, 5, 10]}
        
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='precision')
    grid_search.fit(X_train, y_train)
    
    y_pred = grid_search.predict(X_test)
  
    Recall = recall_score(y_test, y_pred)
    print(f"Model: {model_name}")
    print("Best Parameters:", grid_search.best_params_)
    print("Best Score:", grid_search.best_score_)
    print("Recall Score:", accuracy)
    print("Precision Score:", precision)
    print("-" * 50)


Model: KNN
Best Parameters: {'n_neighbors': 29, 'weights': 'distance'}
Best Score: 0.6765567765567765
Recall Score: 0.7988826815642458
Precision Score: 0.796875
--------------------------------------------------
Model: Decision Tree
Best Parameters: {'max_depth': 5}
Best Score: 0.8121739130434783
Recall Score: 0.7988826815642458
Precision Score: 0.796875
--------------------------------------------------
Model: Random Forest
Best Parameters: {'max_depth': 5, 'n_estimators': 200}
Best Score: 0.8353620394932639
Recall Score: 0.7988826815642458
Precision Score: 0.796875
--------------------------------------------------


### f1_score

In [6]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split, GridSearchCV

# Load the Titanic dataset from seaborn
df = sns.load_dataset('titanic')
df['age'] = df['age'].fillna(df['age'].mean())

# Define the input features (X) and the output (y)
X = df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
y = df['survived']
X = pd.get_dummies(X, columns=['sex'])

# Create the models
models = [
    KNeighborsClassifier(),
    DecisionTreeClassifier(),
    RandomForestClassifier()
]
model_names = ['KNN', 'Decision Tree', 'Random Forest']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Perform grid search for each model and print the results
for model, model_name in zip(models, model_names):
    param_grid = {'n_neighbors': np.arange(1, 30, 2), 'weights': ['uniform', 'distance']}
    if model_name == 'Decision Tree':
        param_grid = {'max_depth': [None, 5, 10, 15]}
    elif model_name == 'Random Forest':
        param_grid = {'n_estimators': [100, 200, 300], 'max_depth': [None, 5, 10]}
        
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='precision')
    grid_search.fit(X_train, y_train)
    
    y_pred = grid_search.predict(X_test)
  
    F1 = f1_score(y_test, y_pred)
    print(f"Model: {model_name}")
    print("Best Parameters:", grid_search.best_params_)
    print("Best Score:", grid_search.best_score_)
    print("F1 Score:", accuracy)
    print("Precision Score:", precision)
    print("-" * 50)


Model: KNN
Best Parameters: {'n_neighbors': 29, 'weights': 'distance'}
Best Score: 0.6765567765567765
F1 Score: 0.7988826815642458
Precision Score: 0.796875
--------------------------------------------------
Model: Decision Tree
Best Parameters: {'max_depth': 5}
Best Score: 0.8046584241326405
F1 Score: 0.7988826815642458
Precision Score: 0.796875
--------------------------------------------------
Model: Random Forest
Best Parameters: {'max_depth': 5, 'n_estimators': 300}
Best Score: 0.8315343366199619
F1 Score: 0.7988826815642458
Precision Score: 0.796875
--------------------------------------------------
