In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Hyperparameter Tuning

- Memanfaatkan Cross validation untuk mencari best value dari parameter model
- Ada 2 cara melakukan hyperparameter tuning:
    - __Randomized Search CV__ : nilai & parameter diacak random
    - __Grid Search CV__ : 
    
Langkah secara umum :
1. Lakukan Cross Validation
2. Berikan Opsi untuk parameter yang akan diatur, misal:
    
    ``` penalty = ['l1','l2','ealsticnet','none']```
    
    ``` max_iter = [10, 100, 1000, 10000]```



<hr>

### Hyperparameter tuning example : Iris Dataset, Logistic Regression()

In [6]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [4]:
data = load_iris()

dfIris = pd.DataFrame(
    data['data'],
    columns = ['SL','SW','PL','PW']
)
dfIris['target'] = data['target']
dfIris.head()

Unnamed: 0,SL,SW,PL,PW,target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [7]:
xtr,xts,ytr,yts = train_test_split(dfIris[['SL','SW','PL','PW']], dfIris['target'], train_size=.8)

In [21]:
modelin = LogisticRegression()
modelin.fit(xtr,ytr)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [22]:
from sklearn.model_selection import cross_val_score
cross_val_score (LogisticRegression(), xtr, ytr, cv=5)
print(np.mean(cross_val_score (LogisticRegression(), xtr, ytr, cv=5)))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

0.9666666666666668


In [43]:
print(modelin.score(xts,yts))

0.9666666666666667


- Meningkatkan akurasi model dengan mencari best value untuk parameter:
    - ```penalty = [‘l1’, ‘l2’, ‘elasticnet’, ‘none’]```
    - ``` solver = [‘newton-cg’, ‘lbfgs’, ‘liblinear’, ‘sag’, ‘saga’]```
    - ``` max_iter = [10,100,1000,10000]```

In [16]:
penalty =['l1','l2','elasticnet','none']
solver = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
max_iter = [10,100,1000,10000]

param = {'penalty' : penalty, 'solver' : solver, 'max_iter' : max_iter}

{'penalty': ['l1', 'l2', 'elasticnet', 'none'],
 'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
 'max_iter': [10, 100, 1000, 10000]}

<hr>

### 1. Randomized Search CV

In [14]:
from sklearn.model_selection import RandomizedSearchCV

In [15]:
model = LogisticRegression()

In [18]:
modelrs = RandomizedSearchCV(
    estimator = model,
    param_distributions = param,
    cv = 5
)

In [19]:
modelrs.fit(xtr,ytr)

ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

ValueError: penalty='none' is not supported for the liblinear solver

ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got elasticnet penalty.



RandomizedSearchCV(cv=5, error_score=nan,
                   estimator=LogisticRegression(C=1.0, class_weight=None,
                                                dual=False, fit_intercept=True,
                                                intercept_scaling=1,
                                                l1_ratio=None, max_iter=100,
                                                multi_class='auto', n_jobs=None,
                                                penalty='l2', random_state=None,
                                                solver='lbfgs', tol=0.0001,
                                                verbose=0, warm_start=False),
                   iid='deprecated', n_iter=10, n_jobs=None,
                   param_distributions={'max_iter': [10, 100, 1000, 10000],
                                        'penalty': ['l1', 'l2', 'elasticnet',
                                                    'none'],
                                        'solver': ['newton-cg', 'l

In [20]:
modelrs.best_params_

{'solver': 'sag', 'penalty': 'none', 'max_iter': 10000}

In [24]:
modelRSbest = LogisticRegression(solver='sag', penalty='none', max_iter=10000)

In [26]:
modelRSbest.fit(xtr,ytr)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=10000,
                   multi_class='auto', n_jobs=None, penalty='none',
                   random_state=None, solver='sag', tol=0.0001, verbose=0,
                   warm_start=False)

In [42]:
modelRSbest.score(xts,yts)

1.0

<hr>

### 1. Grid Cross Validation

In [29]:
from sklearn.model_selection import GridSearchCV

In [30]:
model = LogisticRegression()

In [33]:
modelgs = GridSearchCV(
    model,
    param,
    cv = 5
)

In [34]:
modelgs.fit(xtr,ytr)

ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

ValueError: Solver sag supports only 'l2' or 'none' penalties, got l1 penalty.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
ST

GridSearchCV(cv=5, error_score=nan,
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=100, multi_class='auto',
                                          n_jobs=None, penalty='l2',
                                          random_state=None, solver='lbfgs',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='deprecated', n_jobs=None,
             param_grid={'max_iter': [10, 100, 1000, 10000],
                         'penalty': ['l1', 'l2', 'elasticnet', 'none'],
                         'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag',
                                    'saga']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scorin

In [36]:
print(modelgs.best_params_)

{'max_iter': 10, 'penalty': 'none', 'solver': 'newton-cg'}


In [38]:
modelGSbest = LogisticRegression(max_iter = 10, penalty = 'none', solver = 'newton-cg')

In [40]:
modelGSbest.fit(xtr,ytr)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=10,
                   multi_class='auto', n_jobs=None, penalty='none',
                   random_state=None, solver='newton-cg', tol=0.0001, verbose=0,
                   warm_start=False)

In [41]:
modelGSbest.score(xts,yts)

1.0