# 0.3.9 Búsqueda aleatoria de hiperparámetros usando RandomizedSearchCV

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np

import math
import matplotlib.pyplot as plt

import scipy
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC

from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import warnings
warnings.filterwarnings("ignore")

In [3]:
import sys
sys.path.append("../../../../") 

import utils.paths as path
from utils.paths2 import direcciones

## 0.3.9.1 Parametrización de la búsqueda

In [4]:
#
# Aca se usara una SVM. Dependiendo del tipo de kernel cambian los parámetros
# que pueden ajustarse.
#
# La variable tuned_parameters es una lista de diccionarios que contiene los
# valores que pueden ajustarse.
#


param_distributions = [
    # -------------------------------------------------------------------------
    # Selección aleatoria de valores para el primer modelo
    {
        "kernel": ["rbf"],
        "gamma": scipy.stats.expon(scale=0.1),
        "C": scipy.stats.expon(scale=100),
    },
    # -------------------------------------------------------------------------
    # Selección aleatoria de valores para el segundo modelo
    {
        "kernel": ["linear"],
        "C": [1, 10, 100],
    },
]

randomizedSearchCV = RandomizedSearchCV(
    # --------------------------------------------------------------------------
    # This is assumed to implement the scikit-learn estimator interface.
    estimator=SVC(),
    # --------------------------------------------------------------------------
    # Dictionary with parameters names (str) as keys and distributions or lists
    # of parameters to try.
    param_distributions=param_distributions,
    # --------------------------------------------------------------------------
    # Number of parameter settings that are sampled.
    n_iter=10,
    # --------------------------------------------------------------------------
    # Determines the cross-validation splitting strategy.
    cv=5,
    # --------------------------------------------------------------------------
    # Strategy to evaluate the performance of the cross-validated model on the
    # test set.
    scoring="accuracy",
    # --------------------------------------------------------------------------
    # Refit an estimator using the best found parameters on the whole dataset.
    refit=True,
    # --------------------------------------------------------------------------
    random_state=12345,
)

## 0.3.9.2 Preparación de los datos

In [5]:
digits = load_digits()

n_samples = len(digits.images)
X = digits.images.reshape((n_samples, -1))
y = digits.target

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.5,
    random_state=0,
)

## 0.3.9.3 Realización de la búsqueda

In [6]:
randomizedSearchCV.fit(X_train, y_train)

RandomizedSearchCV(cv=5, estimator=SVC(),
                   param_distributions=[{'C': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001E5FB301280>,
                                         'gamma': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001E58C6667F0>,
                                         'kernel': ['rbf']},
                                        {'C': [1, 10, 100],
                                         'kernel': ['linear']}],
                   random_state=12345, scoring='accuracy')

## 0.3.9.4 Resultados obtenidos

In [7]:
randomizedSearchCV.cv_results_

{'mean_fit_time': array([0.09388723, 0.01599998, 0.1239018 , 0.10306625, 0.01272025,
        0.0712852 , 0.13566866, 0.01279969, 0.09760122, 0.01500173]),
 'std_fit_time': array([0.02272329, 0.00506041, 0.00726316, 0.01058656, 0.00636577,
        0.00829494, 0.00954427, 0.00391983, 0.00932225, 0.00214582]),
 'mean_score_time': array([0.0285162 , 0.00480065, 0.02208571, 0.02904925, 0.00312557,
        0.01776066, 0.0237021 , 0.0047996 , 0.02560062, 0.        ]),
 'std_score_time': array([0.0039724 , 0.00391972, 0.00592975, 0.00686235, 0.00625114,
        0.00385174, 0.0071616 , 0.00391886, 0.00319853, 0.        ]),
 'param_C': masked_array(data=[220.8682396496381, 100, 83.86933864671792,
                    61.97597554687909, 100, 222.88118074128667,
                    11.254669304411362, 10, 81.28259452454746, 1],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value='?',
             dtype=object),
 'param_ga

In [8]:
#
# Estimator that was chosen by the search, i.e. estimator which gave highest
# score (or smallest loss if specified) on the left out data.
#
randomizedSearchCV.best_estimator_

SVC(C=222.88118074128667, gamma=0.0027155110061623483)

In [9]:
randomizedSearchCV.best_score_

0.9833022967101179

In [10]:
randomizedSearchCV.best_params_

{'C': 222.88118074128667, 'gamma': 0.0027155110061623483, 'kernel': 'rbf'}

## 0.3.9.5 Pronóstico con el mejor modelo

In [11]:
randomizedSearchCV.predict(X_train)

array([1, 4, 9, 0, 4, 1, 1, 5, 9, 1, 4, 2, 6, 3, 9, 7, 6, 4, 8, 6, 8, 7,
       6, 0, 5, 9, 4, 7, 3, 4, 9, 4, 9, 7, 9, 1, 5, 6, 0, 0, 4, 3, 6, 1,
       0, 9, 4, 8, 7, 5, 9, 8, 4, 5, 0, 1, 6, 0, 5, 5, 0, 4, 3, 2, 8, 7,
       6, 3, 4, 2, 5, 8, 0, 6, 9, 4, 5, 4, 9, 7, 3, 3, 1, 4, 4, 2, 6, 8,
       1, 1, 0, 3, 7, 4, 6, 7, 4, 0, 5, 2, 9, 2, 1, 9, 2, 3, 1, 7, 7, 4,
       5, 6, 5, 6, 7, 8, 1, 4, 3, 4, 4, 3, 5, 3, 3, 4, 7, 9, 8, 0, 6, 1,
       9, 0, 8, 4, 1, 2, 3, 9, 7, 8, 8, 8, 3, 7, 5, 7, 0, 1, 7, 8, 3, 8,
       0, 4, 8, 6, 2, 3, 6, 7, 3, 7, 7, 1, 3, 5, 0, 9, 8, 5, 3, 1, 2, 0,
       3, 6, 0, 3, 4, 1, 2, 3, 1, 0, 5, 8, 9, 3, 9, 6, 6, 8, 9, 0, 7, 8,
       2, 0, 0, 7, 7, 4, 5, 3, 1, 8, 5, 9, 6, 2, 9, 7, 7, 9, 5, 4, 2, 6,
       6, 1, 3, 4, 7, 2, 8, 0, 6, 1, 6, 6, 5, 8, 4, 3, 0, 5, 2, 9, 9, 7,
       8, 0, 5, 0, 6, 3, 3, 5, 1, 5, 1, 7, 9, 6, 4, 5, 0, 1, 8, 7, 8, 8,
       8, 9, 8, 7, 7, 2, 2, 2, 8, 0, 7, 8, 6, 8, 0, 4, 2, 2, 3, 7, 9, 0,
       2, 0, 0, 2, 7, 1, 5, 6, 4, 0, 0, 5, 5, 3, 9,

In [12]:
print('ok_')

ok_
