SIB - P7
A Jupyter Notebook with examples of how to use cross validation and grid search. 30/10/2023

In [16]:
# imports
from si.io.csv_file import read_csv
from si.models.logistic_regression import LogisticRegression
from si.model_selection.cross_validate import k_fold_cross_validation
from si.model_selection.grid_search import grid_search_cv

In [17]:
# datasets
breast_bin_dataset = read_csv('C:/Users/luis-/Documents/GitHub/Sistemas_inteligentes/datasets/breast-bin/breast-bin.csv', features=False, label=True)


In [18]:
# cross validation
lg = LogisticRegression()
scores = k_fold_cross_validation(lg, breast_bin_dataset, cv=5)
scores

[0.9712230215827338,
 0.9784172661870504,
 0.9712230215827338,
 0.9640287769784173,
 0.9496402877697842]

In [19]:
# grid search cv

lg = LogisticRegression()

# parameter grid
parameter_grid = {
    'l2_penalty': (1, 10),
    'alpha': (0.001, 0.0001, 0.00001),
    'max_iter': (1000, 2000, 3000, 4000, 5000, 6000)
}

# cross validate the model
scores = grid_search_cv(lg,
                        breast_bin_dataset,
                        hyperparameter_grid=parameter_grid,
                        cv=3)

scores

{'scores': [0.9669540229885056,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885057,
  0.9683908045977011,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885056,
  0.9683908045977011,
  0.9669540229885057,
  0.9669540229885056,
  0.9655172413793104,
  0.9669540229885057,
  0.9669540229885057,
  0.9655172413793104,
  0.9655172413793104,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885056,
  0.9669540229885057,
  0.9655172413793104,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885056,
  0.9669540229885057,
  0.9669540229885057,
  0.9669540229885057,
  0.9683908045977012,
  0.9669540229885056,
  0.9669540229885057,
  0.9669540229885056,
  0.9669540229885057],
 'hyperparameters': [{'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 1000},
  {'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 2000},
  {'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 3000},
  {'l2_penalt

In [20]:
#scores
scores['scores']

[0.9669540229885056,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885057,
 0.9683908045977011,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885056,
 0.9683908045977011,
 0.9669540229885057,
 0.9669540229885056,
 0.9655172413793104,
 0.9669540229885057,
 0.9669540229885057,
 0.9655172413793104,
 0.9655172413793104,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885056,
 0.9669540229885057,
 0.9655172413793104,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885056,
 0.9669540229885057,
 0.9669540229885057,
 0.9669540229885057,
 0.9683908045977012,
 0.9669540229885056,
 0.9669540229885057,
 0.9669540229885056,
 0.9669540229885057]

In [21]:
# best hyperparameters
scores['best_hyperparameters']

{'l2_penalty': 10, 'alpha': 1e-05, 'max_iter': 2000}

In [22]:
# best score
scores['best_score']

0.9683908045977012