# Hyper-parameter tuning

In [23]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
import warnings
warnings.filterwarnings('ignore')

In [24]:
iris = load_iris()
df = pd.DataFrame(iris['data'], columns=['SL', 'SW', 'PL', 'PW'])
df['target'] = iris['target']
df.head()

Unnamed: 0,SL,SW,PL,PW,target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [25]:
from sklearn.model_selection import train_test_split
xtr, xts, ytr, yts = train_test_split(
    df[['SL', 'SW', 'PL', 'PW']], df['target']
)
xtr.head()

Unnamed: 0,SL,SW,PL,PW
92,5.8,2.6,4.0,1.2
117,7.7,3.8,6.7,2.2
116,6.5,3.0,5.5,1.8
109,7.2,3.6,6.1,2.5
46,5.1,3.8,1.6,0.2


In [26]:
from sklearn.linear_model import LogisticRegression
modelAsli = LogisticRegression()
modelAsli.fit(xtr, ytr)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [27]:
from sklearn.model_selection import cross_val_score

print(cross_val_score(modelAsli, xtr, ytr))
print(np.mean(cross_val_score(modelAsli, xtr, ytr)))

[0.95652174 0.95652174 1.         1.         0.86363636]
0.9553359683794467


In [28]:
# param yang akan dituned + nilai yg mungkin
penalty = ['l1', 'l2', 'elasticnet', 'none']
solver = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
max_iter = [1, 10, 100, 1000, 10000]

param = {
    'penalty': penalty, 'solver': solver, 'max_iter': max_iter
}
param

{'penalty': ['l1', 'l2', 'elasticnet', 'none'],
 'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
 'max_iter': [1, 10, 100, 1000, 10000]}

Hyperparameter Tuning:
- Randomized Search Cross Validation = diambil sample masing2 param
- Grid Search Cross Validation = diambil semua kombinasi param

### Randomized Search CV

In [29]:
from sklearn.model_selection import RandomizedSearchCV
model = LogisticRegression()
modelrs = RandomizedSearchCV(
    estimator= model, param_distributions= param, cv= 5
)

In [30]:
modelrs.fit(xtr, ytr)
modelrs.best_params_

{'solver': 'lbfgs', 'penalty': 'none', 'max_iter': 10}

In [31]:
modelAsli.score(xts, yts)

0.9736842105263158

In [33]:
modelBaru = LogisticRegression(
    solver='lbfgs', penalty='none', max_iter=10
)
modelBaru.fit(xtr, ytr)
modelBaru.score(xts, yts)

1.0

### Grid Search CV

In [34]:
from sklearn.model_selection import GridSearchCV
model = LogisticRegression()
modelgs = GridSearchCV(
    model, param, cv= 5
)

In [35]:
modelgs.fit(xtr, ytr)
modelgs.best_params_

{'max_iter': 100, 'penalty': 'l2', 'solver': 'sag'}

In [31]:
modelAsli.score(xts, yts)

0.9736842105263158

In [36]:
modelBaru = LogisticRegression(
    solver='sag', penalty='l2', max_iter=100
)
modelBaru.fit(xtr, ytr)
modelBaru.score(xts, yts)

1.0