In [222]:
from sklearn.datasets import load_digits
from sklearn.model_selection import KFold
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings("ignore")

In [223]:
X, y = load_digits(return_X_y=True)

In [224]:
kf = KFold(n_splits=10,
            shuffle=True)

In [225]:
# for tr, vs in kf.split(X):
#     print(tr)
#     print("============")
#     print(vs)

In [226]:
# Guardamos los datos en este arreglo
hy = np.empty_like(y)

# Dividimos en datos de entrenamiento y validacion
for tr, vs in kf.split(X):

    # Entrenamos el modelo
    m = LinearSVC().fit(X[tr], y[tr])
    
    # predecimos con los datos de validación
    hy[vs] = m.predict(X[vs])

In [227]:
# Ya estimado el hy podemos medir el score
lsvc_perf = f1_score(y,
                     hy, 
                    average = 'macro')
lsvc_perf

0.9488970819786806

## Bagging

La idea es dividir el conjunto de entrenamiento en dos y solamente entrenariamos con la mitad, la otra mitad se desperdicia, y entrenando solo con la mitad se entrenara varias veces y vamos a predecir.

In [228]:
# hy para guardar los datos
hy = np.empty_like(y)

# Usamos el mismo paso de Kfold
for tr, vs in kf.split(X):



    # Vamos a guardar varios modelos
    models = []

    for _ in range(2):

        # Queremos que el conjunto de entrenamiento representado por esos indices se divida en dos.
        tr1, tr2 = train_test_split(tr, test_size = 0.5, shuffle = True)

        # Entrenamos el modelo    
        models.append(LinearSVC().fit(X[tr1], y[tr1]))
    
    break


Con estas queremos predecir algo, en este caso la función de decision

In [229]:
[m.decision_function(X[vs]) for m in models]

[array([[ -4.52512067,  -6.30528015,   3.17808847, ...,  -1.87452706,
          -5.99978196, -13.87283432],
        [ -2.78361605,  -2.99962673,  -5.34668638, ...,  -2.47969511,
           0.1582429 ,  -8.85912341],
        [ -3.44275125,  -2.63252628,  -1.20454676, ...,  -5.50650528,
          -8.89202403,  -0.13648195],
        ...,
        [ -5.32667284,   5.37940141,  -3.52889899, ...,  -3.8689471 ,
          -2.69698001, -17.13935684],
        [ -4.00375742,  -2.08661971,  -4.15922224, ...,  -5.33232426,
          -1.57605559, -21.72345483],
        [ -4.32293902, -11.96444263,  -3.33036513, ...,  -6.08993937,
           0.6101741 ,  -4.67219149]]),
 array([[ -5.21545418,  -2.74472788,   3.54468466, ...,  -3.38732653,
          -5.20966728, -12.27945069],
        [ -2.97721413,   2.19513692,  -7.52423507, ...,  -2.39701563,
          -3.09262368, -16.24255578],
        [ -3.83704702,  -0.70934341,  -1.62900401, ...,  -7.71693552,
          -6.11236157,  -1.2625697 ],
        ...,


In [230]:
np.array([m.decision_function(X[vs]) for m in models]).shape

(2, 180, 10)

Lo que acabamos de hacer es preguntarle a cada modelo por la función de desición, podemos tomar la media de la función de desicion y con esa media utilizar.

In [231]:
np.array([m.decision_function(X[vs]) for m in models]).mean(axis = 0)[0]

array([ -4.87028742,  -4.52500402,   3.36138656,  -7.79453951,
        -5.81042726,  -5.50223757,  -4.1456292 ,  -2.63092679,
        -5.60472462, -13.07614251])

Aqui indica a que clase pertence

In [232]:
np.array([m.decision_function(X[vs]) for m in models]).mean(axis = 0).argmax(axis = 1)

array([2, 4, 9, 5, 2, 6, 1, 2, 4, 8, 1, 8, 7, 5, 4, 0, 5, 8, 1, 7, 7, 1,
       4, 2, 8, 5, 6, 3, 5, 1, 5, 9, 7, 0, 2, 2, 6, 6, 1, 9, 3, 3, 8, 4,
       5, 9, 0, 7, 1, 0, 2, 2, 8, 1, 4, 8, 0, 7, 4, 6, 1, 9, 6, 4, 4, 7,
       8, 3, 4, 0, 1, 6, 6, 8, 1, 6, 1, 8, 4, 8, 1, 4, 3, 0, 2, 8, 5, 7,
       2, 6, 6, 7, 1, 2, 2, 3, 5, 6, 3, 0, 7, 5, 5, 2, 4, 9, 2, 4, 9, 9,
       7, 3, 4, 8, 0, 0, 7, 8, 2, 7, 7, 0, 5, 1, 4, 6, 4, 7, 8, 1, 2, 0,
       7, 5, 8, 0, 7, 3, 1, 2, 8, 8, 2, 6, 1, 8, 5, 4, 0, 1, 0, 0, 1, 6,
       9, 6, 2, 2, 5, 7, 3, 7, 2, 0, 9, 5, 4, 1, 8, 3, 7, 1, 6, 5, 0, 4,
       3, 1, 6, 8], dtype=int64)

Esto es nuestra predicción:

In [233]:
hy = np.empty_like(y)

for tr, vs in kf.split(X):
    
    models = []
    
    for _ in range(2):

        tr1, tr2 = train_test_split(tr, test_size = 0.5, shuffle=True)
        
        models.append(LinearSVC().fit(X[tr1], y[tr1]))

    hy[vs] = np.array([m.decision_function(X[vs]) for m in models]).mean(axis = 0).argmax(axis = 1)

In [234]:
lsvc_perf, f1_score(y, hy, average='macro')

(0.9488970819786806, 0.9448802969924662)

In [235]:
hy = np.empty_like(y)

for tr, vs in kf.split(X):
    models = []
    for _ in range(4):
        tr1, tr2 = train_test_split(tr, test_size = 0.5, shuffle=True)
        models.append(LinearSVC().fit(X[tr1], y[tr1]))

    hy[vs] = np.array([m.decision_function(X[vs]) for m in models]).mean(axis = 0).argmax(axis = 1)

In [236]:
lsvc_perf, f1_score(y, hy, average='macro')

(0.9488970819786806, 0.9504063071225038)

In [239]:
hy = np.empty_like(y)

for tr, vs in kf.split(X):
    models = []
    for _ in range(10):
        tr1, tr2 = train_test_split(tr, test_size = 0.5, shuffle=True)
        models.append(LinearSVC().fit(X[tr1], y[tr1]))

    hy[vs] = np.array([m.decision_function(X[vs]) for m in models]).mean(axis = 0).argmax(axis = 1)

In [240]:
lsvc_perf, f1_score(y, hy, average='macro')

(0.9488970819786806, 0.9578552980917902)