# Grid Search Cross Validation 

In [1]:
from sklearn.linear_model import LinearRegression
# create a model
model= LinearRegression()

#get the parameters list
print(model.get_params())

{'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'positive': False}


In [1]:
from sklearn.linear_model import LogisticRegression
# create a model
model= LogisticRegression()

#get the parameters list
print(model.get_params())

{'C': 1.0, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 100, 'multi_class': 'auto', 'n_jobs': None, 'penalty': 'l2', 'random_state': None, 'solver': 'lbfgs', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}


In [2]:
from sklearn.ensemble import RandomForestClassifier
# create a model
model= RandomForestClassifier()

#get the parameters list
print(model.get_params())

{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}


In [3]:
from sklearn.svm import SVC
# create a model
model=SVC()

#get the parameters list
print(model.get_params())

{'C': 1.0, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}


In [4]:
from sklearn.tree import DecisionTreeClassifier
# create a model
model=DecisionTreeClassifier()

#get the parameters list
print(model.get_params())

{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'random_state': None, 'splitter': 'best'}


In [5]:
from sklearn.neighbors import KNeighborsClassifier
# create a model
model=KNeighborsClassifier()

#get the parameters list
print(model.get_params())

{'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}


# LinearRegression GridSearchCV

In [6]:
import pandas as pd
import seaborn as sns
df= sns.load_dataset('titanic')
df['age']=df.age.fillna(df['age'].mean())
X=df[['age']]
y=df['fare']

# For Linear Regression
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

#creating the model
model= LogisticRegression()

# define parameter grid
param_grid = {'fit_intercept': [True, False]}

#object grid search cv (creating the model)
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='precision')  # scoring='r2' for regression

# trainning the model
grid_search.fit(X,y)

# print the best parameters
print("Best parameters: ", grid_search.best_params_)

Best parameters:  {'fit_intercept': True}


# KNN GridSearchCV

In [7]:
df.columns

Index(['survived', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare',
       'embarked', 'class', 'who', 'adult_male', 'deck', 'embark_town',
       'alive', 'alone'],
      dtype='object')

In [18]:
import pandas as pd
import numpy as np
import seaborn as sns
df= sns.load_dataset('titanic')
X=df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
y=df['survived']
X=pd.get_dummies(X, columns=['sex'])
X['age']=df.age.fillna(X['age'].mean())

# For Knn
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

#creating the model
model= KNeighborsClassifier()

# define parameter grid
param_grid = {'n_neighbors': np.arange(1,30,1),'weights': ['uniform', 'distance']}

#object grid search cv (creating the model)
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='f1')

# trainning the model
grid_search.fit(X,y)

# print the best parameters
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

Best parameters:  {'n_neighbors': 8, 'weights': 'distance'}
Best score:  0.6394577789355196


# DecisionTreeClassifier GridSearchCV

In [19]:
import pandas as pd
import numpy as np
import seaborn as sns
df= sns.load_dataset('titanic')
X=df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
y=df['survived']
X=pd.get_dummies(X, columns=['sex'])
X['age']=df.age.fillna(X['age'].mean())

# For DTC
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

#creating the model
model= DecisionTreeClassifier()

# define parameter grid
param_grid = {'max_depth': [None,3,5,6], 'min_samples_leaf': [1,2,3,4]}

#object grid search cv (creating the model)
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='precision')

# trainning the model
grid_search.fit(X,y)

# print the best parameters
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

Best parameters:  {'max_depth': 6, 'min_samples_leaf': 2}
Best score:  0.8177374987719814


# LogisticRegression GridSearchCV

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
df= sns.load_dataset('titanic')
X=df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
y=df['survived']
X=pd.get_dummies(X, columns=['sex'])
X['age']=df.age.fillna(X['age'].mean())

# For Logistic Regression
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

#creating the model
model= LogisticRegression()

# define parameter grid
param_grid = {'class_weight': [None,1,2,3], 'dual': [False, True], 'fit_intercept': [True, False]}

#object grid search cv (creating the model)
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='precision')

# trainning the model
grid_search.fit(X,y)

# print the best parameters
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Best parameters:  {'class_weight': None, 'dual': False, 'fit_intercept': False}
Best score:  0.7547782042518885


70 fits failed out of a total of 80.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
10 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\HP\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\HP\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\HP\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\linear_model\_logistic.py", line 59, in _

# Random Forest GridsearchCV

In [23]:
import pandas as pd
import numpy as np
import seaborn as sns
df= sns.load_dataset('titanic')
X=df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
y=df['survived']
X=pd.get_dummies(X, columns=['sex'])
X['age']=df.age.fillna(X['age'].mean())

# For Random forest classifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

#creating the model
model= RandomForestClassifier()

# define parameter grid
param_grid = { 'max_depth': [None,1,5,7], 'max_leaf_nodes': [None,1,5,7], 'max_samples': [None,2,3,7]}

#object grid search cv (creating the model)
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='precision')

# trainning the model
grid_search.fit(X,y)

# print the best parameters
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Best parameters:  {'max_depth': None, 'max_leaf_nodes': 5, 'max_samples': 3}
Best score:  0.9672672672672672


80 fits failed out of a total of 320.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
80 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\HP\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\HP\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\ensemble\_forest.py", line 340, in fit
    self._validate_params()
  File "c:\Users\HP\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 600, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\HP\AppData\Local\Programs\Python\Python311\Lib\site-pack

---
# July 16, 2023