# 4.1.4 Búsqueda aleatoria de hiperparámetros usando RandomizedSearchCV

In [7]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

import scipy
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC

import warnings
warnings.filterwarnings("ignore")

## 4.1.4.1 Parametrización de la búsqueda

In [9]:
param_distributions = [
    # -------------------------------------------------------------------------
    # Selección aleatoria de valores para el primer modelo
    {
        "kernel": ["rbf"],
        "gamma": scipy.stats.expon(scale=0.1),
        "C": scipy.stats.expon(scale=100),
    },
    # -------------------------------------------------------------------------
    # Selección aleatoria de valores para el segundo modelo
    {
        "kernel": ["linear"],
        "C": [1, 10, 100],
    },
]

randomizedSearchCV = RandomizedSearchCV(
    # --------------------------------------------------------------------------
    # This is assumed to implement the scikit-learn estimator interface.
    estimator=SVC(),
    # --------------------------------------------------------------------------
    # Dictionary with parameters names (str) as keys and distributions or lists
    # of parameters to try.
    param_distributions=param_distributions,
    # --------------------------------------------------------------------------
    # Number of parameter settings that are sampled.
    n_iter=10,
    # --------------------------------------------------------------------------
    # Determines the cross-validation splitting strategy.
    cv=5,
    # --------------------------------------------------------------------------
    # Strategy to evaluate the performance of the cross-validated model on the
    # test set.
    scoring="accuracy",
    # --------------------------------------------------------------------------
    # Refit an estimator using the best found parameters on the whole dataset.
    refit=True,
    # --------------------------------------------------------------------------
    random_state=12345,
)

## 4.1.4.2 Preparación de los datos  

In [10]:
digits = load_digits()

n_samples = len(digits.images)
X = digits.images.reshape((n_samples, -1))
y = digits.target

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.5,
    random_state=0,
)

## 4.1.4.3 Realización de la búsqueda

In [11]:
randomizedSearchCV.fit(X_train, y_train)

## 4.1.4.4 Valores retornados

In [13]:
randomizedSearchCV.cv_results_

{'mean_fit_time': array([0.1315886 , 0.01823463, 0.11291604, 0.10881763, 0.01460547,
        0.07392693, 0.16717968, 0.01346097, 0.13881478, 0.01471157]),
 'std_fit_time': array([0.01397431, 0.0071627 , 0.00611838, 0.00909313, 0.00161114,
        0.0031472 , 0.02025033, 0.00088228, 0.06217673, 0.00059682]),
 'mean_score_time': array([0.02008986, 0.00449457, 0.01933832, 0.02372823, 0.00394163,
        0.01258116, 0.01796761, 0.00400629, 0.02312775, 0.00359197]),
 'std_score_time': array([0.00350255, 0.00210363, 0.00492275, 0.0028163 , 0.00099783,
        0.00107992, 0.00287154, 0.00136548, 0.00670272, 0.0003321 ]),
 'param_C': masked_array(data=[220.8682396496381, 100, 83.86933864671792,
                    61.97597554687909, 100, 222.88118074128667,
                    11.254669304411362, 10, 81.28259452454746, 1],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value='?',
             dtype=object),
 'param_ga

In [14]:
#
# Estimator that was chosen by the search, i.e. estimator which gave highest
# score (or smallest loss if specified) on the left out data.
#
randomizedSearchCV.best_estimator_

In [15]:
randomizedSearchCV.best_score_

0.9833022967101179

In [16]:
randomizedSearchCV.best_params_

{'C': 222.88118074128667, 'gamma': 0.0027155110061623483, 'kernel': 'rbf'}

## 4.1.4.5 Pronóstico con el mejor modelo

In [17]:
randomizedSearchCV.predict(X_train)

array([1, 4, 9, 0, 4, 1, 1, 5, 9, 1, 4, 2, 6, 3, 9, 7, 6, 4, 8, 6, 8, 7,
       6, 0, 5, 9, 4, 7, 3, 4, 9, 4, 9, 7, 9, 1, 5, 6, 0, 0, 4, 3, 6, 1,
       0, 9, 4, 8, 7, 5, 9, 8, 4, 5, 0, 1, 6, 0, 5, 5, 0, 4, 3, 2, 8, 7,
       6, 3, 4, 2, 5, 8, 0, 6, 9, 4, 5, 4, 9, 7, 3, 3, 1, 4, 4, 2, 6, 8,
       1, 1, 0, 3, 7, 4, 6, 7, 4, 0, 5, 2, 9, 2, 1, 9, 2, 3, 1, 7, 7, 4,
       5, 6, 5, 6, 7, 8, 1, 4, 3, 4, 4, 3, 5, 3, 3, 4, 7, 9, 8, 0, 6, 1,
       9, 0, 8, 4, 1, 2, 3, 9, 7, 8, 8, 8, 3, 7, 5, 7, 0, 1, 7, 8, 3, 8,
       0, 4, 8, 6, 2, 3, 6, 7, 3, 7, 7, 1, 3, 5, 0, 9, 8, 5, 3, 1, 2, 0,
       3, 6, 0, 3, 4, 1, 2, 3, 1, 0, 5, 8, 9, 3, 9, 6, 6, 8, 9, 0, 7, 8,
       2, 0, 0, 7, 7, 4, 5, 3, 1, 8, 5, 9, 6, 2, 9, 7, 7, 9, 5, 4, 2, 6,
       6, 1, 3, 4, 7, 2, 8, 0, 6, 1, 6, 6, 5, 8, 4, 3, 0, 5, 2, 9, 9, 7,
       8, 0, 5, 0, 6, 3, 3, 5, 1, 5, 1, 7, 9, 6, 4, 5, 0, 1, 8, 7, 8, 8,
       8, 9, 8, 7, 7, 2, 2, 2, 8, 0, 7, 8, 6, 8, 0, 4, 2, 2, 3, 7, 9, 0,
       2, 0, 0, 2, 7, 1, 5, 6, 4, 0, 0, 5, 5, 3, 9,

In [12]:
print('ok_')

ok_
