In [1]:
from si.data.dataset import Dataset
from si.io.CSV import read_csv
from si.model_selection.cross_validate import cross_validate
from si.model_selection.grid_search import grid_search_cv
from si.model_selection.randomized_search import randomized_search_cv
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

#Models
from si.linear_model.logistic_regression import LogisticRegression

# Cross validation

In [2]:
#Importing data
data = read_csv("C:/Users/rober/si/datasets/breast/breast-bin.csv", ",", False, -1)

data.X = StandardScaler().fit_transform(data.X)

In [3]:
model = LogisticRegression()
scores = cross_validate(model, data, cv=5, test_size=0.3)

#print(scores)
scores_dataframe = pd.DataFrame(scores)
print(scores_dataframe)

1000
1000
1000
1000
1000
   seed     train      test
0   373  0.967347  0.966507
1   379  0.969388  0.961722
2   839  0.967347  0.971292
3   352  0.963265  0.976077
4   373  0.967347  0.966507


# Grid search

In [4]:
model = LogisticRegression()

parameters = {"l2_penalty": [1, 10],
              "alpha": [0.001, 0.0001],
              "max_iter": [1000, 2000]}

scores = grid_search_cv(model, data, parameters, cv=3, test_size=0.3)

#print(scores)

for elem in scores:
    print("\n|\n")
    print("Parameters:\n-----------")
    print(elem["parameters"])
    print("\nScores:\n-------")
    print(pd.DataFrame({k:v for k,v in elem.items() if k!="parameters"}))
#scores_dataframe = pd.DataFrame(scores)
#print(scores_dataframe)

1000
1000
1000
2000
2000
2000
1000
1000
1000
2000
2000
2000
1000
1000
1000
2000
2000
2000
1000
1000
1000
2000
2000
2000

|

Parameters:
-----------
{'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 1000}

Scores:
-------
   seed     train      test
0   379  0.969388  0.961722
1   839  0.967347  0.971292
2   352  0.963265  0.976077

|

Parameters:
-----------
{'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 2000}

Scores:
-------
   seed     train      test
0   373  0.969388  0.966507
1   379  0.969388  0.961722
2   839  0.967347  0.971292

|

Parameters:
-----------
{'l2_penalty': 1, 'alpha': 0.0001, 'max_iter': 1000}

Scores:
-------
   seed     train      test
0   352  0.963265  0.976077
1   373  0.967347  0.966507
2   379  0.969388  0.961722

|

Parameters:
-----------
{'l2_penalty': 1, 'alpha': 0.0001, 'max_iter': 2000}

Scores:
-------
   seed     train      test
0   839  0.965306  0.971292
1   352  0.963265  0.976077
2   373  0.967347  0.966507

|

Parameters:
-----------
{'l2_penalty':

# Randomized search

In [5]:
model = LogisticRegression()

parameters = {"l2_penalty": np.linspace(1,10,10).astype(int),
              "alpha": np.linspace(0.001, 0.0001, 100),
              "max_iter": np.linspace(1000, 2000, 200).astype(int)}

scores = randomized_search_cv(model, data, parameters, cv=3, n_iter=10, test_size=0.3)

#print(scores)

for elem in scores:
    print("\n|\n")
    print("Parameters:\n-----------")
    print(elem["parameters"])
    print("\nScores:\n-------")
    print(pd.DataFrame({k:v for k,v in elem.items() if k!="parameters"}))

1150
1150
1150
1859
1859
1859
1246
1246
1246
1010
1010
1010
1859
1859
1859
1246
1246
1246
1010
1010
1010
1859
1859
1859
1246
1246
1246
1010
1010
1010

|

Parameters:
-----------
{'l2_penalty': 1, 'alpha': 0.0008181818181818182, 'max_iter': 1150}

Scores:
-------
   seed     train      test
0   997  0.973469  0.952153
1    39  0.969388  0.961722
2   589  0.965306  0.971292

|

Parameters:
-----------
{'l2_penalty': 8, 'alpha': 0.0008181818181818182, 'max_iter': 1859}

Scores:
-------
   seed     train      test
0   966  0.961224  0.980861
1   858  0.969388  0.961722
2    57  0.977551  0.942584

|

Parameters:
-----------
{'l2_penalty': 9, 'alpha': 0.0009272727272727273, 'max_iter': 1246}

Scores:
-------
   seed     train      test
0   670  0.967347  0.966507
1   356  0.971429  0.956938
2   982  0.967347  0.971292

|

Parameters:
-----------
{'l2_penalty': 6, 'alpha': 0.0004727272727272728, 'max_iter': 1010}

Scores:
-------
   seed     train      test
0   571  0.965306  0.971292
1    3