# GridSearchCV

search for the best model hypter parameters

In [None]:
# from sklearn.datasets import load_digits
# from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression


import pandas as pd

import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV


from sklearn.model_selection import RepeatedStratifiedKFold

from sklearn.datasets import load_iris
# import seaborn as sn

# Manual without GridSearch

In [None]:
iris = load_iris()

In [None]:
df = pd.read_csv("./data/covid.csv")

X = df[["age"]]
y = df[["covid"]]

X_train, X_test, y_train, y_test = train_test_split(X, y , train_size= 0.8)

In [None]:
# Create an instance of the required model
model = LogisticRegression(solver = "lbfgs")
model = LogisticRegression(solver = "liblinear")

# Train the model
model.fit(X_train, y_train)

# Using GridSearchCV

In [None]:
# 0. Create an instance of the model
model = LogisticRegression()

In [None]:
# 1. Define the Paramater Grid of the Algorithm
param_grid =  {
    "solver" : ["lbfg", "liblinear"],
    "C": [100, 10, 1.0, 0.1]
}


In [None]:
# 2. Define the K-Folds for Cross Validation
cv = RepeatedStratifiedKFold(n_splits= 10 , n_repeats= 3)


In [None]:
# 3. Define the GridSearchCV
grid_search = GridSearchCV(estimator= model, param_grid=param_grid, cv=cv, scoring= "accuracy" )

# Build and Train the GridSearch

In [None]:
# 4. Fit the GridsearchCV
grid_result = grid_search.fit(iris.data, iris.target)

# Show the best Model score and the best Parameter combination

In [None]:
print(grid_result.best_score_)
print(grid_result.best_params_)

In [None]:
results_df = pd.DataFrame(grid_search.cv_results_)
results_df

# GridSearchCV to test also several models

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier


In [None]:
cv = RepeatedStratifiedKFold(n_splits= 10 , n_repeats= 3)

In [None]:
model_param_grid = {

    "logisitc_regression": {
        "model": LogisticRegression(),
        "param_grid": {
             "solver" : ["lbfg", "liblinear"],
             "C": [100, 10, 1.0, 0.1]
        }
    },

    "randomforest": {
        "model": RandomForestClassifier(),
        "param_grid": {
             "n_estimators" : [10 , 50 , 100, 500]
        }
    }

}

In [None]:
# collect the scores cross all trainings
scores = []


for model_name, model_param in model_param_grid.items():

    grid_search = GridSearchCV(estimator= model_param["model"], param_grid= model_param["param_grid"], cv = cv )
    grid_search = RandomizedSearchCV(estimator= model_param["model"], param_distributions= model_param["param_grid"], cv = cv , n_iter = 3)

    grid_search.fit(iris.data, iris.target)

    scores.append( {
        "model": model_name,
        "best_score": grid_search.best_score_,
        "best_param": grid_search.best_params_
    })




In [None]:
df_multi_grid = pd.DataFrame(scores, columns= ["model", "best_score", "best_param"])

df_multi_grid