In [44]:
from sklearn.datasets import load_digits
from sklearn.model_selection import KFold
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings("ignore")

In [45]:
X, y = load_digits(return_X_y=True)

In [46]:
kf = KFold(n_splits=10,
            shuffle=True)

In [47]:
# for tr, vs in kf.split(X):
#     print(tr)
#     print("============")
#     print(vs)

In [7]:
# Guardamos los datos en este arreglo
hy = np.empty_like(y)

# Dividimos en datos de entrenamiento y validacion
for tr, vs in kf.split(X):

    # Entrenamos el modelo
    m = LinearSVC().fit(X[tr], y[tr])
    
    # predecimos con los datos de validación
    hy[vs] = m.predict(X[vs])

In [8]:
# Ya estimado el hy podemos medir el score
lsvc_perf = f1_score(y,
                     hy, 
                    average = 'macro')
lsvc_perf

0.9549083785164385

## Bagging

La idea es dividir el conjunto de entrenamiento en dos y solamente entrenariamos con la mitad, la otra mitad se desperdicia, y entrenando solo con la mitad se entrenara varias veces y vamos a predecir.

In [9]:
# hy para guardar los datos
hy = np.empty_like(y)

# Usamos el mismo paso de Kfold
for tr, vs in kf.split(X):



    # Vamos a guardar varios modelos
    models = []

    for _ in range(2):

        # Queremos que el conjunto de entrenamiento representado por esos indices se divida en dos.
        tr1, tr2 = train_test_split(tr, test_size = 0.5, shuffle = True)

        # Entrenamos el modelo    
        models.append(LinearSVC().fit(X[tr1], y[tr1]))
    
    break


Con estas queremos predecir algo, en este caso la función de decision

In [10]:
[m.decision_function(X[vs]) for m in models]

[array([[ -2.12445068,  -5.93718672,  -7.74544556, ...,  -4.3562226 ,
          -4.30754313, -13.21845487],
        [ -4.17089044,  -8.81813547,  -6.44839305, ...,   1.45127921,
          -8.28757488, -11.38042965],
        [  2.07860833,  -7.23271644,  -3.2924245 , ...,  -4.52039032,
          -4.82231668,  -7.07341577],
        ...,
        [ -4.17203622,   1.31162681,  -4.48875473, ...,  -4.51415254,
          -0.26118532, -15.57601527],
        [ -4.72193489, -10.71101545,  -4.73121478, ...,   2.40249412,
          -9.77514184, -12.73014012],
        [ -2.74316636,  -5.93792165,  -2.21921854, ...,  -4.90490675,
          -4.42744896,  -0.96720084]]),
 array([[ -1.16639683,  -5.15471703,  -6.48087045, ...,  -3.30088257,
          -3.31340822, -15.02169741],
        [ -4.32008327, -10.0056712 ,  -5.02837755, ...,   2.3733056 ,
          -8.08660989,  -8.70002946],
        [  1.61383095,  -8.98331207,  -2.9170566 , ...,  -3.21983017,
          -7.69095318,  -6.77777557],
        ...,


In [11]:
np.array([m.decision_function(X[vs]) for m in models]).shape

(2, 180, 10)

Lo que acabamos de hacer es preguntarle a cada modelo por la función de desición, podemos tomar la media de la función de desicion y con esa media utilizar.

In [12]:
np.array([m.decision_function(X[vs]) for m in models]).mean(axis = 0)[0]

array([ -1.64542376,  -5.54595187,  -7.113158  , -19.85286328,
         2.08607398, -11.54495512,  -5.5441065 ,  -3.82855259,
        -3.81047568, -14.12007614])

Aqui indica a que clase pertence

In [13]:
np.array([m.decision_function(X[vs]) for m in models]).mean(axis = 0).argmax(axis = 1)

array([4, 7, 0, 8, 0, 6, 0, 3, 5, 0, 3, 3, 6, 0, 8, 1, 0, 0, 3, 6, 1, 3,
       1, 1, 8, 7, 2, 1, 9, 6, 1, 8, 2, 6, 9, 8, 9, 0, 7, 1, 1, 4, 7, 4,
       9, 4, 5, 1, 6, 7, 3, 2, 0, 7, 4, 1, 4, 4, 7, 1, 2, 9, 8, 3, 6, 2,
       0, 4, 9, 3, 4, 5, 0, 5, 1, 5, 0, 1, 4, 8, 8, 5, 8, 7, 5, 9, 6, 4,
       5, 4, 6, 7, 3, 6, 2, 2, 1, 6, 7, 5, 4, 2, 7, 0, 5, 8, 6, 3, 1, 3,
       3, 2, 3, 1, 9, 9, 0, 2, 4, 9, 0, 6, 9, 8, 1, 1, 6, 0, 7, 0, 4, 2,
       3, 4, 1, 7, 8, 3, 5, 0, 3, 3, 1, 1, 4, 0, 4, 8, 1, 3, 9, 2, 3, 1,
       3, 7, 8, 6, 0, 7, 0, 1, 1, 9, 5, 1, 4, 3, 8, 9, 1, 6, 9, 7, 3, 1,
       6, 1, 7, 9], dtype=int64)

Esto es nuestra predicción:

In [14]:
hy = np.empty_like(y)

for tr, vs in kf.split(X):
    
    models = []
    
    for _ in range(2):

        tr1, tr2 = train_test_split(tr, test_size = 0.5, shuffle=True)
        
        models.append(LinearSVC().fit(X[tr1], y[tr1]))

    hy[vs] = np.array([m.decision_function(X[vs]) for m in models]).mean(axis = 0).argmax(axis = 1)

In [15]:
lsvc_perf, f1_score(y, hy, average='macro')

(0.9549083785164385, 0.9413528000472015)

In [16]:
hy = np.empty_like(y)

for tr, vs in kf.split(X):
    models = []
    for _ in range(4):
        tr1, tr2 = train_test_split(tr, test_size = 0.5, shuffle=True)
        models.append(LinearSVC().fit(X[tr1], y[tr1]))

    hy[vs] = np.array([m.decision_function(X[vs]) for m in models]).mean(axis = 0).argmax(axis = 1)

In [17]:
lsvc_perf, f1_score(y, hy, average='macro')

(0.9549083785164385, 0.9542966959420223)

In [18]:
hy = np.empty_like(y)

for tr, vs in kf.split(X):
    models = []
    for _ in range(10):
        tr1, tr2 = train_test_split(tr, test_size = 0.5, shuffle=True)
        models.append(LinearSVC().fit(X[tr1], y[tr1]))

    hy[vs] = np.array([m.decision_function(X[vs]) for m in models]).mean(axis = 0).argmax(axis = 1)

In [19]:
lsvc_perf, f1_score(y, hy, average='macro')

(0.9549083785164385, 0.9516447268969651)

# Stack Generalization

In [20]:
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier

In [21]:
# Creamos un arreglo de tamaño y
hy = np.empty_like(y)

# Dividimos en datos de entrenamiento y validacion
for tr, vs in kf.split(X):

    # Entrenamos el modelol
    m = GaussianNB().fit(X[tr], y[tr])

    # Validamos el modelo
    hy[vs] = m.predict(X[vs])

In [22]:
# Revisamos el resultado
f1_score(y, hy, average='macro')

0.8417341327289918

In [25]:
def train(base_cl, X, y):

    # un arreglo de tamaño y
    df = np.empty((X.shape[0], np.unique(y).shape[0]))

    # Kfold
    kf = KFold(n_splits=10, shuffle=True, random_state=1)

    # Dividimos en datos de entrenamiento y validacion
    for tr, vs in kf.split(X):

        # Se entrena con tr
        m = base_cl().fit(X[tr], y[tr])

        # Se valida con vs
        df[vs] = m.predict_proba(X[vs])

    return df

In [26]:
for tr, vs in kf.split(X):

    m1 = GaussianNB().fit(X[tr], y[tr])

    st = RandomForestClassifier().fit(train(GaussianNB, X[tr], y[tr]), y[tr])

    hy[vs] = st.predict(m1.predict_proba(X[vs]))

In [27]:
f1_score(y, hy, average='macro')

0.8765021968795246

In [31]:
def train(base_cl, X, y):

    df = np.empty((X.shape[0], np.unique(y).shape[0]))

    kf = KFold(n_splits=10, shuffle=True, random_state=1)

    for tr, vs in kf.split(X):

        m = base_cl().fit(X[tr], y[tr])
        try:
            df[vs] = m.predict_proba(X[vs])

        except AttributeError:
            df[vs] = m.decision_function(X[vs])
    return df

In [35]:
for tr, vs in kf.split(X):

    m1 = GaussianNB().fit(X[tr], y[tr])

    m2 = LinearSVC().fit(X[tr], y[tr])

    _ = np.concatenate((train(GaussianNB, X[tr], y[tr]), train(LinearSVC, X[tr], y[tr])), axis = 1)

    st = RandomForestClassifier().fit(_, y[tr])

    _ = np.concatenate((m1.predict_proba(X[vs]), m2.decision_function(X[vs])), axis = 1)

    hy[vs] = st.predict(_)

In [36]:
f1_score(y, hy, average='macro')

0.9638211736052652

In [41]:
hy_svc = np.empty_like(y)
hy_nb = np.empty_like(y)
for tr, vs in kf.split(X):

    m1 = GaussianNB().fit(X[tr], y[tr])

    hy_nb[vs] = m1.predict(X[vs])

    m2 = LinearSVC().fit(X[tr], y[tr])

    hy_svc[vs] = m2.predict(X[vs])

    _ = np.concatenate((train(GaussianNB, X[tr], y[tr]), train(LinearSVC, X[tr], y[tr])), axis = 1)

    st = RandomForestClassifier().fit(_, y[tr])

    _ = np.concatenate((m1.predict_proba(X[vs]), m2.decision_function(X[vs])), axis = 1)

    hy[vs] = st.predict(_)

In [42]:
f1_score(y, hy_svc, average='macro')

0.9418200013521867

In [43]:
f1_score(y, hy_nb, average='macro')

0.8397375068311727