# Cross Validate Test

In [3]:
from src.si.IO.csv_file import read_csv
from sklearn.preprocessing import StandardScaler
from src.si.model_selection.cross_validate import cross_validate
from src.si.model_selection.grid_search import grid_search_cv
from src.si.model_selection.randomized_grid_search import randomized_search_cv
from src.si.linear_model.logistic_regression import LogisticRegression
import numpy as np

In [4]:
breast_bin_path = "/home/rui/Desktop/universidade/SIB/si/datasets/breast-bin.csv"
dataset = read_csv(breast_bin_path, label=True)

In [5]:
dataset.X = StandardScaler().fit_transform(dataset.X)

In [7]:
lg_model = LogisticRegression()
score = cross_validate(lg_model, dataset=dataset, cv=5)
score

{'seed': [741, 150, 881, 874, 663],
 'train': [0.9660107334525939,
  0.962432915921288,
  0.962432915921288,
  0.9677996422182469,
  0.9677996422182469],
 'test': [0.9712230215827338,
  0.9856115107913669,
  0.9856115107913669,
  0.9640287769784173,
  0.9640287769784173]}

# Grid Search test

In [8]:
lg_model = LogisticRegression()
lg_model_parameters = {'l2_penalty': [1, 10],
             'alpha': [0.001, 0.0001],
            'max_iter': [1000, 2000]}

scores = grid_search_cv(lg_model, dataset, lg_model_parameters, cv=3)
scores

[{'seed': [751, 321, 589],
  'train': [0.9660107334525939, 0.9713774597495528, 0.9660107334525939],
  'test': [0.9712230215827338, 0.9496402877697842, 0.9712230215827338],
  'parameters': {'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 1000}},
 {'seed': [815, 164, 421],
  'train': [0.9713774597495528, 0.964221824686941, 0.9731663685152058],
  'test': [0.9496402877697842, 0.9784172661870504, 0.9424460431654677],
  'parameters': {'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 2000}},
 {'seed': [473, 56, 567],
  'train': [0.9677996422182469, 0.9660107334525939, 0.9695885509838998],
  'test': [0.9640287769784173, 0.9712230215827338, 0.9568345323741008],
  'parameters': {'l2_penalty': 1, 'alpha': 0.0001, 'max_iter': 1000}},
 {'seed': [364, 961, 592],
  'train': [0.960644007155635, 0.960644007155635, 0.964221824686941],
  'test': [0.9928057553956835, 0.9928057553956835, 0.9784172661870504],
  'parameters': {'l2_penalty': 1, 'alpha': 0.0001, 'max_iter': 2000}},
 {'seed': [631, 587, 772],
  'train

# Randomized Search test

In [9]:
lg_model = LogisticRegression()

lg_model_parameters = {'l2_penalty': np.linspace(1, 10, 10),
             'alpha': np.linspace(0.001, 0.0001, 100),
            'max_iter': np.linspace(1000, 2000, 200)}

scores = randomized_search_cv(lg_model, dataset, lg_model_parameters, cv=3)
scores

{'parameters': [{'l2_penalty': 7.0,
   'alpha': 0.0009454545454545454,
   'max_iter': 1170.854271356784},
  {'l2_penalty': 7.0,
   'alpha': 0.0008636363636363636,
   'max_iter': 1326.6331658291458},
  {'l2_penalty': 2.0,
   'alpha': 0.0008181818181818182,
   'max_iter': 1572.8643216080402},
  {'l2_penalty': 10.0,
   'alpha': 0.00025454545454545456,
   'max_iter': 1984.924623115578},
  {'l2_penalty': 2.0,
   'alpha': 0.0001454545454545455,
   'max_iter': 1256.281407035176},
  {'l2_penalty': 9.0,
   'alpha': 0.00038181818181818184,
   'max_iter': 1231.1557788944724},
  {'l2_penalty': 8.0,
   'alpha': 0.0002090909090909091,
   'max_iter': 1216.0804020100502},
  {'l2_penalty': 9.0,
   'alpha': 0.00023636363636363633,
   'max_iter': 1854.2713567839196},
  {'l2_penalty': 6.0,
   'alpha': 0.0005363636363636364,
   'max_iter': 1135.678391959799},
  {'l2_penalty': 8.0,
   'alpha': 0.0002181818181818182,
   'max_iter': 1376.8844221105528}],
 'seeds': [881, 413, 957, 838, 805, 480, 828, 884, 287,