In [1]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV

In [2]:
# create a model
model= LinearRegression()

# create the parameters list
print(model.get_params())

{'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'positive': False}


In [3]:
from sklearn.ensemble import RandomForestClassifier

# create a model
model= RandomForestClassifier()

# create the parameters list
print(model.get_params())

{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}


In [4]:
from sklearn.neighbors import KNeighborsClassifier

# create a model
model= KNeighborsClassifier()

# create the parameters list
print(model.get_params())

{'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}


In [5]:
from sklearn.svm import SVC

# create a model
model= SVC()

# create the parameters list
print(model.get_params())

{'C': 1.0, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}


In [6]:
from sklearn.tree import DecisionTreeClassifier

# create a model
model= DecisionTreeClassifier()

# create the parameters list
print(model.get_params())

{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'random_state': None, 'splitter': 'best'}


# Lets do hyper parameter tunning for linear regression

In [7]:
# Linear regression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [8]:
#load the dataset
import pandas as pd
import numpy as np
import seaborn as sns

df= sns.load_dataset('titanic')

#fill the nan values in the age col
df['age']= df['age'].fillna(df['age'].mean())

In [9]:
X= df['age']
y= df['fare']

# reshape X and y
X= X.values.reshape(-1,1)
y= y.values.reshape(-1,1)


# create a model
model= LinearRegression()
# define parameter grid
param_grid= {'fit_intercept': [True, False]}

# create GridSearchCV object
grid_search= GridSearchCV(model, param_grid, cv=5) #scoring='r2')

# fit the model
grid_search.fit(X, y)

# print best parameters and score
print('Best Parameters:' ,grid_search.best_params_)
print('Best Score:' ,grid_search.best_score_)

Best Parameters: {'fit_intercept': True}
Best Score: -0.0033709951196183806


In [10]:
df.columns

Index(['survived', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare',
       'embarked', 'class', 'who', 'adult_male', 'deck', 'embark_town',
       'alive', 'alone'],
      dtype='object')

In [11]:
df['survived'].shape
# to check its null values of survived col
df['survived'].isnull().sum()

0

## KNN Model GridSearch CV

In [12]:
import numpy as np
import pandas as pd
df= sns.load_dataset('titanic')
# fill the nan values of age col
df['age']= df['age'].fillna(df['age'].mean())

X= df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
X.isnull().sum()
y=df['survived']

# get dummies for the 'sex' col in X
X= pd.get_dummies(X, columns=["sex"])
# X= pd.get_dummies(X)

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

model= KNeighborsClassifier()

# grid parameters
# param_grid= {'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'weights': ['distance', 'uniform']}

# instead writing are the neighbours we can use numpy as well
param_grid ={'n_neighbors': np.arange(1,30,2), 'weights': ['distance','uniform']}  # np.arange mean from 1 to 40 with interval of 2
#create GridSearchCV object 
grid_search= GridSearchCV(model, param_grid, cv=5 , scoring='f1')

# fit the model
grid_search.fit(X, y)
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)



Best Parameters: {'n_neighbors': 7, 'weights': 'distance'}
Best Score: 0.6234282955414142


## We will do the same for Decision Tree Classifier

In [13]:
import numpy as np
import pandas as pd
df= sns.load_dataset('titanic')
# fill the nan values of age col
df['age']= df['age'].fillna(df['age'].mean())

X= df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
X.isnull().sum()
y=df['survived']

# get dummies for the 'sex' col in X
X= pd.get_dummies(X, columns=["sex"])
# X= pd.get_dummies(X)

from sklearn.tree import  DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

model= DecisionTreeClassifier()

# grid parameters
param_grid= {'max_depth': [3,5,7,None], 'min_samples_split': [2,3,4]}

# instead writing are the neighbours we can use numpy as well
# param_grid ={'n_neighbors': np.arange(1,30,2), 'weights': ['distance','uniform']}  # np.arange mean from 1 to 40 with interval of 2
#create GridSearchCV object 
grid_search= GridSearchCV(model, param_grid, cv=5 , scoring='f1')

# fit the model
grid_search.fit(X, y)
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)



Best Parameters: {'max_depth': 5, 'min_samples_split': 2}
Best Score: 0.7375116925970873


# Best Model comparison using Gridsearch CV

In [14]:
# # Import the necessary libraries
# # import libraries
# import pandas as pd
# import numpy as np
# import seaborn as sns
# import matplotlib.pyplot as plt

# df = sns.load_dataset("titanic")
# X = df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
# y = df['survived']
# X = pd.get_dummies(X, columns=['sex'])
# X.age.fillna(value = X['age'].mean(), inplace=True)


# from sklearn.linear_model import LogisticRegression
# from sklearn.svm import SVC
# from sklearn.tree import DecisionTreeClassifier
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.neighbors import KNeighborsClassifier
# from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
# from sklearn.model_selection import train_test_split

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
# # model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']

# # models_scores = []
# # for model, model_name in zip(models, model_names):
# #     model.fit(X_train, y_train)
# #     y_pred = model.predict(X_test)
# #     accuracy = accuracy_score(y_test, y_pred)
# #     models_scores.append([model_name,accuracy])

# # sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
# # for model in sorted_models:
# #     print("Accuracy Score: ",f'{model[0]} : {model[1]:.2f}')


# # Accuracy Score:  Random Forest : 0.81
# # Accuracy Score:  Decision Tree : 0.79
# # Accuracy Score:  KNN : 0.76
# # Accuracy Score:  Logistic Regression : 0.75
# # Accuracy Score:  SVM : 0.74

# models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
# model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']
# models_scores = []

# # define the paramater grid
# param_grid = {
#     'C': [0.1, 1, 10],
#     'gamma': [10,1, 0.1],
#     'kernel': ['linear','rbf']

# }
# # create the grid
# grid = GridSearchCV(SVC(), param_grid, cv=5)
# grid.fit(X, y)
# print("Best Parameters:{}".format(grid.best_params_))
# print("Best Cross_val_score:{:0.2f}".format(grid.best_score_))

# # for model, model_name in zip(models, model_names):
# #     model.fit(X_train, y_train)
# #     y_pred = model.predict(X_test)
# #     Precision = precision_score(y_test, y_pred)
# #     models_scores.append([model_name,Precision])

# sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
# for model in sorted_models:
#     print("Precision Score: ", f'{model[0]} : {model[1]:.2f}')

# # Precision Score:  Random Forest : 0.80
# # Precision Score:  Decision Tree : 0.78
# # Precision Score:  KNN : 0.75
# # Precision Score:  Logistic Regression : 0.74
# # Precision Score:  SVM : 0.73

# # models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
# # model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']
# # models_scores = []
# # for model, model_name in zip(models, model_names):
# #     model.fit(X_train, y_train)
# #     y_pred = model.predict(X_test)
# #     Recall = recall_score(y_test, y_pred)
# #     models_scores.append([model_name,Recall])

# # sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
# # for model in sorted_models:
# #     print("Recall Score: ",f'{model[0]} : {model[1]:.2f}')

# # Recall Score:  Random Forest : 0.74
# # Recall Score:  Decision Tree : 0.72
# # Recall Score:  KNN : 0.68
# # Recall Score:  Logistic Regression : 0.67
# # Recall Score:  SVM : 0.65

# # models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
# # model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']
# # models_scores = []
# # for model, model_name in zip(models, model_names):
# #     model.fit(X_train, y_train)
# #     y_pred = model.predict(X_test)
# #     F1 = f1_score(y_test, y_pred)
# #     models_scores.append([model_name,F1])

# # sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
# # for model in sorted_models:
# #     print("F1 Score: ",f'{model[0]} : {model[1]:.2f}')

# # F1 Score:  Random Forest : 0.77
# # F1 Score:  Decision Tree : 0.75
# # F1 Score:  KNN : 0.71
# # F1 Score:  Logistic Regression : 0.70
# # F1 Score:  SVM : 0.68

In [16]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

# Load the titanic dataset (you can replace this with your own dataset)
df = sns.load_dataset("titanic")
X = df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
y = df['survived']
X = pd.get_dummies(X, columns=['sex'])
X.age.fillna(value = X['age'].mean(), inplace=True)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the models and their hyperparameter grids for GridSearchCV
models = {
    'SVM': {
        'model': SVC(),
        'params': {
            'C': [0.1, 1, 10],
            'kernel': ['linear', 'rbf']
        }
    },
    'Random Forest': {
        'model': RandomForestClassifier(),
        'params': {
            'n_estimators': [50, 100, 200],
            'max_depth': [None, 5, 10]
        }
    },
    'Logistic Regression': {
        'model': LogisticRegression(),
        'params': {
            'C': [0.1, 1, 10],
            'penalty': ['l1', 'l2']
        }
    }
}

# Perform GridSearchCV for each model and print the results
for models, model_info in models.items():
    model = model_info['model']
    params = model_info['params']
    
    grid_search = GridSearchCV(model, params, cv=5)
    grid_search.fit(X_train, y_train)
    
    print(f"Model: {model}")
    print(f"Best Score: {model.best_score_}")
    print(f"Best Parameters: {model.best_params_}")

Model: SVC()


AttributeError: 'SVC' object has no attribute 'best_score_'