# 4.1.4 Búsqueda aleatoria de hiperparámetros usando RandomizedSearchCV

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

import scipy
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC

import warnings
warnings.filterwarnings("ignore")

## 4.1.4.1 Parametrización de la búsqueda

In [None]:
param_distributions = [
    # -------------------------------------------------------------------------
    # Selección aleatoria de valores para el primer modelo
    {
        "kernel": ["rbf"],
        "gamma": scipy.stats.expon(scale=0.1),
        "C": scipy.stats.expon(scale=100),
    },
    # -------------------------------------------------------------------------
    # Selección aleatoria de valores para el segundo modelo
    {
        "kernel": ["linear"],
        "C": [1, 10, 100],
    },
]

randomizedSearchCV = RandomizedSearchCV(
    # --------------------------------------------------------------------------
    # This is assumed to implement the scikit-learn estimator interface.
    estimator=SVC(),
    # --------------------------------------------------------------------------
    # Dictionary with parameters names (str) as keys and distributions or lists
    # of parameters to try.
    param_distributions=param_distributions,
    # --------------------------------------------------------------------------
    # Number of parameter settings that are sampled.
    n_iter=10,
    # --------------------------------------------------------------------------
    # Determines the cross-validation splitting strategy.
    cv=5,
    # --------------------------------------------------------------------------
    # Strategy to evaluate the performance of the cross-validated model on the
    # test set.
    scoring="accuracy",
    # --------------------------------------------------------------------------
    # Refit an estimator using the best found parameters on the whole dataset.
    refit=True,
    # --------------------------------------------------------------------------
    random_state=12345,
)

## 4.1.4.2 Preparación de los datos  

In [None]:
digits = load_digits()

n_samples = len(digits.images)
X = digits.images.reshape((n_samples, -1))
y = digits.target

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.5,
    random_state=0,
)

## 4.1.4.3 Realización de la búsqueda

In [None]:
randomizedSearchCV.fit(X_train, y_train)

## 4.1.4.4 Valores retornados

In [None]:
randomizedSearchCV.cv_results_

In [None]:
#
# Estimator that was chosen by the search, i.e. estimator which gave highest
# score (or smallest loss if specified) on the left out data.
#
randomizedSearchCV.best_estimator_

In [None]:
randomizedSearchCV.best_score_

In [None]:
randomizedSearchCV.best_params_

## 4.1.4.5 Pronóstico con el mejor modelo

In [None]:
randomizedSearchCV.predict(X_train)

In [None]:
print('ok_')