In [35]:
import os
import re
import random
import numbers
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt

from itertools import permutations

from keras import backend as K
from keras.utils import to_categorical
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Flatten, Lambda, Activation, MaxPooling2D, \
                            GlobalAveragePooling2D, Conv2D, BatchNormalization, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator

from tqdm import tqdm

from sklearn.model_selection import cross_val_score, train_test_split, StratifiedKFold, RandomizedSearchCV
from sklearn.metrics import fbeta_score


INPUT_DIR = '../input'
EMB_SIZE = 8
BATCH_SIZE = 1024
N_FOLDS = 2
N_ITER = 50
SEED = 32

In [2]:
""" --------------------------------- Triplet loss implementation ----------------------------------- """


def _all_diffs(a, b):
    """ Returns a tensor of all combinations of a - b.
    Args:
        a (2D tensor): A batch of vectors shaped (B1, F).
        b (2D tensor): A batch of vectors shaped (B2, F).
    Returns:
        The matrix of all pairwise differences between all vectors in `a` and in
        `b`, will be of shape (B1, B2).
    Note:
        For convenience, if either `a` or `b` is a `Distribution` object, its
        mean is used.
    """
    return tf.expand_dims(a, axis=1) - tf.expand_dims(b, axis=0)


def _cdist(a, b, metric='euclidean'):
    """Similar to scipy.spatial's _cdist, but symbolic.
    The currently supported metrics can be listed as `_cdist.supported_metrics` and are:
        - 'euclidean', although with a fudge-factor epsilon.
        - 'sqeuclidean', the squared euclidean.
        - 'cityblock', the manhattan or L1 distance.
    Args:
        a (2D tensor): The left-hand side, shaped (B1, F).
        b (2D tensor): The right-hand side, shaped (B2, F).
        metric (string): Which distance metric to use, see notes.
    Returns:
        The matrix of all pairwise distances between all vectors in `a` and in
        `b`, will be of shape (B1, B2).
    Note:
        When a square root is taken (such as in the Euclidean case), a small
        epsilon is added because the gradient of the square-root at zero is
        undefined. Thus, it will never return exact zero in these cases.
    """
    with tf.name_scope("_cdist"):
        diffs = _all_diffs(a, b)
        if metric == 'sqeuclidean':
            return tf.reduce_sum(tf.square(diffs), axis=-1)
        elif metric == 'euclidean':
            return tf.sqrt(tf.reduce_sum(tf.square(diffs), axis=-1) + 1e-12)
        elif metric == 'cityblock':
            return tf.reduce_sum(tf.abs(diffs), axis=-1)
        else:
            raise NotImplementedError(
                'The following metric is not implemented by `_cdist` yet: {}'.format(metric))


_cdist.supported_metrics = [
    'euclidean',
    'sqeuclidean',
    'cityblock',
]


def _get_at_indices(tensor, indices):
    """ Like `tensor[np.arange(len(tensor)), indices]` in numpy. """
    counter = tf.range(tf.shape(indices, out_type=indices.dtype)[0])
    return tf.gather_nd(tensor, tf.stack((counter, indices), -1))


def batch_hard_loss(features, pids, metric='euclidean', margin=0.1):
    """Computes the batch-hard loss from arxiv.org/abs/1703.07737.
    Args:
        dists (2D tensor): A square all-to-all distance matrix as given by _cdist.
        pids (1D tensor): The identities of the entries in `batch`, shape (B,).
            This can be of any type that can be compared, thus also a string.
        margin: The value of the margin if a number, alternatively the string
            'soft' for using the soft-margin formulation, or `None` for not
            using a margin at all.
    Returns:
        A 1D tensor of shape (B,) containing the loss value for each sample.
        :param margin:
        :param features:
        :param pids:
        :param metric:
    """
    with tf.name_scope("batch_hard_loss"):

        dists = _cdist(features, features, metric=metric)

        pids = tf.argmax(pids, axis=1)

        exp_dims0 = tf.expand_dims(pids, axis=0)
        exp_dims1 = tf.expand_dims(pids, axis=1)

        same_identity_mask = tf.equal(exp_dims1, exp_dims0)

        negative_mask = tf.logical_not(same_identity_mask)
        positive_mask = tf.logical_xor(same_identity_mask,
                                       tf.eye(tf.shape(pids)[0], dtype=tf.bool))

        furthest_positive = tf.reduce_max(dists*tf.cast(positive_mask, tf.float32), axis=1)
        # closest_negative = tf.map_fn(lambda x: tf.reduce_min(tf.boolean_mask(x[0], x[1])),
        #                              (dists, negative_mask), tf.float32)
        # Another way of achieving the same, though more hacky:
        closest_negative = tf.reduce_min(dists + 1e5*tf.cast(same_identity_mask, tf.float32), axis=1)

        diff = furthest_positive - closest_negative
        if isinstance(margin, numbers.Real):
            diff = tf.maximum(diff + margin, 0.0)
        elif margin == 'soft':
            diff = tf.nn.softplus(diff)
        elif margin is None:
            pass
        else:
            raise NotImplementedError('The margin {} is not implemented in batch_hard_loss'.format(margin))

    return diff


def triplet_loss(labels, features):
    # https://github.com/tensorflow/tensorflow/issues/20253
    # from tensorflow.contrib.losses import metric_learning
    # return metric_learning.triplet_semihard_loss(K.argmax(labels, axis=1), embeddings, margin=0.2)
    return tf.reduce_mean(batch_hard_loss(features, labels, margin=0.2))

In [3]:
def f1(y_true, y_pred):

    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    
    recall = true_positives / (possible_positives + K.epsilon())
    precision = true_positives / (predicted_positives + K.epsilon())
    
    return 2 * ((precision * recall)/(precision + recall + K.epsilon()))

In [4]:
""" ------------------------------------ Data loading -------------------------------------- """

# load dataframes
df_train = pd.read_csv(os.path.join(INPUT_DIR, 'train.csv'))
df_test = pd.read_csv(os.path.join(INPUT_DIR, 'test.csv'))

print(df_train.head())

   label  pixel0  pixel1    ...     pixel781  pixel782  pixel783
0      1       0       0    ...            0         0         0
1      0       0       0    ...            0         0         0
2      1       0       0    ...            0         0         0
3      4       0       0    ...            0         0         0
4      0       0       0    ...            0         0         0

[5 rows x 785 columns]


In [31]:
""" --------------------------------- Data preprocessing ----------------------------------- """

# load pixel values, reshape them to 28x28 pixels and rescale from [0, 255] to [0, 1]
x_train = df_train.iloc[:,1:].values.astype('float32') / 255.
x_test = df_test.values.astype('float32') / 255.

# make images 28x28x1
xc_train = np.reshape(x_train, (len(x_train), 28, 28, 1))
xc_test = np.reshape(x_test, (len(x_test), 28, 28, 1))

# load labels
y_train = df_train.label.values
yc_train = to_categorical(y_train)

input_size = output_size = x_train.shape[1]
input_csize = output_csize = xc_train.shape[1]

print(xc_train[:5], xc_test[:5])
# print(yc_train)

[[[[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  ...

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]]


 [[[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  ...

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]]


 [[[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  ...

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.

In [32]:
""" -------------------------------------------- Models ------------------------------------------------ """

def base_network(model_type='triplet', input_shape=input_csize):
    """
    Base network to be shared.
    """
    if model_type == 'autoencoder':
        pass
    elif model_type == 'triplet':
        model = Sequential([
            Conv2D(filters=64, kernel_size=(3, 3), padding='same', input_shape=(input_csize, input_csize, 1,), activation='relu'),
            Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu'),
            BatchNormalization(),
            MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),
            Dropout(0.25),
            
            Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu'),
            Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu'),
            BatchNormalization(),
            MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),
            Dropout(0.25),
            
            Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu'),
            Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu'),
            BatchNormalization(),
            MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),
            Dropout(0.25),
            
            Flatten(),
            Dense(512, activation='relu'),
            Dense(256, activation='relu'),
            BatchNormalization(),
            Dropout(0.25),
            Dense(10, name='embeddings', activation='softmax'),
#             Lambda(lambda z: K.l2_normalize(z, axis=1))
        ])
    return model

In [33]:
""" --------------------------------- Data generator ----------------------------------- """

datagen = ImageDataGenerator(
        featurewise_center=False, # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images

datagen.fit(xc_train)

In [42]:
""" --------------------------------- Triplet model training ----------------------------------- """

yfull_test = []

skf = StratifiedKFold(n_splits=N_FOLDS, random_state=SEED, shuffle=True)

print(len(xc_train), len(y_train))

for i, (train_index, val_index) in enumerate(skf.split(xc_train, y_train)):

    triplet_model = base_network()
    triplet_model.compile(optimizer=RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0), 
                          loss=['categorical_crossentropy'], metrics=[f1])
#     triplet_model.summary()
    
    weights_path = os.path.join('.', f'w{i}.h5')

    callbacks=[
    #         EarlyStopping(monitor='val_f1', min_delta=.0001),
        ReduceLROnPlateau(monitor='val_f1', patience=3, verbose=1, factor=0.5, min_lr=0.00001),
        ModelCheckpoint(weights_path, monitor='val_f1', mode='max', save_best_only=True, verbose=0)
    ]
    
    xb_train = xc_train[train_index]
    yb_train = yc_train[train_index]
    xb_val = xc_train[val_index]
    yb_val = yc_train[val_index]

    history = triplet_model.fit_generator(
        datagen.flow(xb_train, yb_train, batch_size=BATCH_SIZE),
        validation_data=datagen.flow(xb_val, yb_val, batch_size=BATCH_SIZE),
        validation_steps=xb_val.shape[0] // BATCH_SIZE,
        verbose=2,
        epochs=N_ITER,
        steps_per_epoch=xb_train.shape[0] // BATCH_SIZE,
        shuffle=True,
        callbacks=callbacks)
        
    if os.path.isfile(weights_path):
        triplet_model.load_weights(weights_path)

    print(fbeta_score(yb_val, np.array(triplet_model.predict(xb_val, batch_size=128, verbose=2)) > 0.2, beta=2, average='samples'))

    yfull_test.append(triplet_model.predict(xc_test, batch_size=128, verbose=2))


42000 42000
Epoch 1/50
 - 13s - loss: 0.8015 - f1: 0.7426 - val_loss: 0.9149 - val_f1: 0.7334
Epoch 2/50
 - 8s - loss: 0.1938 - f1: 0.9390 - val_loss: 0.2930 - val_f1: 0.9238
Epoch 3/50
 - 11s - loss: 0.1427 - f1: 0.9566 - val_loss: 0.1615 - val_f1: 0.9581
Epoch 4/50
 - 11s - loss: 0.1050 - f1: 0.9669 - val_loss: 0.1483 - val_f1: 0.9644

Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 5/50
 - 11s - loss: 0.0598 - f1: 0.9816 - val_loss: 0.0701 - val_f1: 0.9783
Epoch 6/50
 - 11s - loss: 0.0496 - f1: 0.9844 - val_loss: 0.0745 - val_f1: 0.9776
Epoch 7/50
 - 11s - loss: 0.0452 - f1: 0.9866 - val_loss: 0.0616 - val_f1: 0.9818

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 8/50
 - 11s - loss: 0.0369 - f1: 0.9882 - val_loss: 0.0419 - val_f1: 0.9879
Epoch 9/50
 - 12s - loss: 0.0302 - f1: 0.9906 - val_loss: 0.0331 - val_f1: 0.9904
Epoch 10/50
 - 12s - loss: 0.0273 - f1: 0.9913 - val_loss: 0.0459 - val_f1: 0.9862

Epoch

Epoch 37/50
 - 11s - loss: 0.0144 - f1: 0.9957 - val_loss: 0.0213 - val_f1: 0.9939
Epoch 38/50
 - 11s - loss: 0.0160 - f1: 0.9952 - val_loss: 0.0216 - val_f1: 0.9931
Epoch 39/50
 - 12s - loss: 0.0168 - f1: 0.9954 - val_loss: 0.0223 - val_f1: 0.9933
Epoch 40/50
 - 13s - loss: 0.0157 - f1: 0.9957 - val_loss: 0.0224 - val_f1: 0.9934
Epoch 41/50
 - 12s - loss: 0.0141 - f1: 0.9956 - val_loss: 0.0203 - val_f1: 0.9938
Epoch 42/50
 - 11s - loss: 0.0158 - f1: 0.9951 - val_loss: 0.0204 - val_f1: 0.9932
Epoch 43/50
 - 11s - loss: 0.0147 - f1: 0.9955 - val_loss: 0.0215 - val_f1: 0.9938
Epoch 44/50
 - 11s - loss: 0.0137 - f1: 0.9958 - val_loss: 0.0226 - val_f1: 0.9932
Epoch 45/50
 - 11s - loss: 0.0153 - f1: 0.9947 - val_loss: 0.0215 - val_f1: 0.9941
Epoch 46/50
 - 12s - loss: 0.0160 - f1: 0.9950 - val_loss: 0.0198 - val_f1: 0.9935
Epoch 47/50
 - 11s - loss: 0.0148 - f1: 0.9957 - val_loss: 0.0236 - val_f1: 0.9929
Epoch 48/50
 - 11s - loss: 0.0151 - f1: 0.9954 - val_loss: 0.0206 - val_f1: 0.9936
Epoc

In [None]:
# """ --------------------------------- Triplet model training ----------------------------------- """

# yfull_test = []

# skf = StratifiedKFold(n_splits=N_FOLDS, random_state=SEED, shuffle=True)

# print(len(xc_train), len(y_train))

# for i, (train_index, val_index) in enumerate(skf.split(xc_train, yc_train)):

#     triplet_model = base_network()
#     triplet_model.compile(optimizer=RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0), 
#                           loss=['categorical_crossentropy'], metrics=[f1])
# #     triplet_model.summary()
    
#     weights_path = os.path.join('.', f'w{i}.h5')

#     callbacks=[
# #         EarlyStopping(monitor='val_f1', min_delta=.0001),
#         ReduceLROnPlateau(monitor='val_acc', patience=3, verbose=1, factor=0.5, min_lr=0.00001),
#         ModelCheckpoint(weights_path, monitor='val_f1', mode='max', save_best_only=True, verbose=0)
#     ]
    
#     xb_train = xc_train[train_index]
#     yb_train = yc_train[train_index]
#     xb_val = xc_train[val_index]
#     yb_val = yc_train[val_index]

#     triplet_model.fit(
# #         xb_train, 
# #         yb_train,
#         datagen.flow(xc_train, yc_train, batch_size=BATCH_SIZE),
#         validation_data=(xb_val, yb_val),
#         verbose=2,
#         epochs=25,
#         batch_size=128,
#         shuffle=True,
#         callbacks=callbacks)
        
#     if os.path.isfile(weights_path):
#         triplet_model.load_weights(weights_path)

#     y_pred = triplet_model.predict(xb_val, batch_size=128, verbose=2)
#     print(fbeta_score(yb_val, np.array(y_pred) > 0.2, beta=2, average='samples'))

#     yfull_test.append(triplet_model.predict(xc_test, batch_size=128, verbose=2))

In [43]:
pred = np.array(yfull_test)
pred = np.argmax(pred, axis=2)
values, counts = np.unique(pred, axis=0, return_counts=True)
pred = values[np.argmax(counts)]
print(pred.shape)

(28000,)


In [None]:
# """ ----------------------------- Grid params initialization ------------------------------ """

# MODELS = {
# #     'lr': {
# #         'model': LogisticRegression,
# #         'params': {
# #             'fit_intercept': [True, False],
# #             'multi_class': ['ovr'],
# #             'penalty': ['l2'],
# #             'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
# #             'tol': [0.01, 0.05, 0.1, 0.5, 1, 5],
# #             'random_state': [SEED],
# #         },
# #         'best_params': {'tol': 0.05, 'solver': 'newton-cg', 'random_state': 32, 'penalty': 'l2', 'multi_class': 'ovr', 'fit_intercept': True},
# #         'best score': 0.813692480359147,
# #     },
# #     'mlp': {
# #         'model': MLPClassifier,
# #         'params': {
# #             'activation' : ['identity', 'logistic', 'tanh', 'relu'],
# #             'solver' : ['lbfgs', 'adam'],
# #             'learning_rate' : ['constant', 'invscaling', 'adaptive'],
# #             'learning_rate_init': [.01, .05, .1, .2, .5, 1, 2],
# #             'random_state': [SEED],
# #         },
# #         'best_params': {'solver': 'lbfgs', 'random_state': 32, 'learning_rate_init': 2, 'learning_rate': 'adaptive', 'activation': 'identity'},
# #         'best_score': 0.8092031425364759,
# #     },
#     'knn': {
#         'model': KNeighborsClassifier,
#         'params': {
#             'n_neighbors' : range(1, 5),
#             'weights' : ['uniform', 'distance'],
#             'algorithm' : ['auto', 'ball_tree', 'kd_tree', 'brute'],
#             'leaf_size' : range(10, 100, 10),
#         },
#         'best_params': {'weights': 'distance', 'n_neighbors': 2, 'leaf_size': 50, 'algorithm': 'auto'},
#         'best_score': 1.
#     },
# #     'lrcv': {
# #         'model': LogisticRegressionCV,
# #         'params': {
# #             'Cs': [1, 2, 4, 8, 16, 32],
# #             'fit_intercept': [True, False],
# #             'refit': [True, False],
# #             'multi_class': ['ovr'],
# #             'penalty': ['l2'],
# #             'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
# #             'tol': [0.01, 0.05, 0.1, 0.5, 1, 5],
# #             'cv': [cv]
# #         },
# #         'best_params': {'tol': 0.05, 'solver': 'newton-cg', 'refit': True, 'penalty': 'l2', 'multi_class': 'ovr', 'fit_intercept': False, 'cv': 4, 'Cs': 2},
# #         'best_score': 0.8428731762065096
# #     },
# #     'dt': {
# #         'model': DecisionTreeClassifier,
# #         'params': {
# #             'criterion': ['gini', 'entropy'],
# #             'max_depth': range(6, 10),
# #             'max_features': ['auto', 'sqrt', 'log2', None],
# #             'min_samples_split': [2, 5, 10], # Minimum number of samples required to split a node
# #             'min_samples_leaf': [1, 2, 4], # Minimum number of samples required at each leaf node
# #         },
# #         'best_params': {'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': None, 'max_depth': 6, 'criterion': 'gini'},
# #         'best_score': 0.8181818181818182,
# #     },
# #     'svc': {
# #         'model': SVC,
# #         'params': {
# #             'C': [0.1, 0.5, 1., 2., 4.],
# #             'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
# #             'gamma': ['auto', 'scale'],
# #             'degree': range(5),
# #             'tol': [0.1, 0.5, 1, 5],
# #         },
# #         'best_params': {'tol': 1, 'shrinking': False, 'probability': False, 'kernel': 'rbf', 'gamma': 'scale', 'degree': 4, 'C': 2.0},
# #         'best_score': 0.8428731762065096
# #     },
# #     'rf': {
# #         'model': RandomForestClassifier,
# #         'params': {
# #             'n_estimators': range(10, 251, 20),
# #             'max_features': ['auto', 'sqrt', 'log2', None],
# #             'max_depth': range(5, 20),
# #             'min_samples_split': range(2, 10), # Minimum number of samples required to split a node
# #             'min_samples_leaf': range(1, 10), # Minimum number of samples required at each leaf node
# #             'bootstrap': [True, False], # Method of selecting samples for training each tree,
# #             'random_state': [SEED],
# #         },
# #         'best_params': {'random_state': 32, 'n_jobs': -1, 'n_estimators': 70, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': None, 'max_depth': 17, 'bootstrap': True},
# #         'best_score': 0.8417508417508418
# #     },
# #     'ada': {
# #         'model': AdaBoostClassifier,
# #         'params': {
# #             'n_estimators': range(10, 251, 20),
# #             'learning_rate': [.01, .05, .1, .2, .5, 1, 2],
# #             'algorithm': ['SAMME', 'SAMME.R'],
# #             'random_state': [SEED],
# #         },
# #         'best_params': {'random_state': 32, 'n_estimators': 170, 'learning_rate': 1, 'algorithm': 'SAMME.R'},
# #         'best_score': 0.8237934904601572
# #     },
# #     'et': {
# #         'model': ExtraTreesClassifier,
# #         'params': {
# #             'n_estimators': range(10, 251, 20),
# #             'max_features': ['auto', 'sqrt', 'log2', None],
# #             'max_depth': range(5, 20),
# #             'min_samples_split': range(2, 10), # Minimum number of samples required to split a node
# #             'min_samples_leaf': range(1, 10), # Minimum number of samples required at each leaf node
# #             'bootstrap': [True, False], # Method of selecting samples for training each tree,
# #             'random_state': [SEED],
# #         },
# #         'best_params': {'random_state': 32, 'n_jobs': -1, 'n_estimators': 70, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': None, 'max_depth': 11, 'bootstrap': True},
# #         'best_score': 0.8294051627384961
# #     },
# #     'gb': {
# #         'model': GradientBoostingClassifier,
# #         'params': {
# #             'n_estimators': range(10, 251, 20),
# #             'max_depth': range(5, 20),
# #             'loss': ['deviance', 'exponential'],
# #             'learning_rate': [.01, .05, .1, .2, .5, 1, 2],                      
# #             'subsample': [.25, .5, .8, 1.],
# #             'min_samples_split': range(2, 10), # Minimum number of samples required to split a node
# #             'min_samples_leaf': range(1, 10), # Minimum number of samples required at each leaf node
# #             'random_state': [SEED],
# #         },
# #         'best_params': {'subsample': 0.5, 'random_state': 32, 'n_estimators': 150, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': 13, 'loss': 'exponential', 'learning_rate': 1},
# #         'best_score': 0.8361391694725028
# #     }
# #     'xgb': {
# #         'model': XGBClassifier,
# #         'params': {
# #             'n_estimators': range(8, 20),
# #             'max_depth': range(5, 20),
# #             'learning_rate': [.01, .05, .1, .2, .5, 1, 2],
# #             'colsample_bytree': [.6, .7, .8, .9, 1]
# #         }
# #     }
# }

In [None]:
# """ ---------------------------- Best models configuration search --------------------------- """

# FIT_FROM_SCRATCH = False

# for name, model in MODELS.items():
    
#     if 'best_score' in model and not FIT_FROM_SCRATCH:
        
#         # Initialize with best parameters & fit to data
#         print(f'Fitting {name}...')
        
#         model['best_estimator'] = model['model'](**model['best_params']).fit(xf_train, y_train)
        
#         scores = cross_val_score(model['best_estimator'], xf_train, y_train, cv=N_FOLDS)
#         score = sum(scores) / len(scores)
#         diff = score - model['best_score']
        
#         if diff > 0:
#             print(f'Accuracy of model {name}: {score} (BIGGER for {diff})')
#         elif diff < 0:
#             print(f'Accuracy of model {name}: {score} (SMALLER for {-diff})')
#         else:
#             print(f'Accuracy of model {name}: {score} (SAME)')
#     else:
#         # Perform random search
#         searcher = RandomizedSearchCV(param_distributions=model['params'],
#                                       estimator=model['model'](), scoring="accuracy",
#                                       verbose=1, n_iter=N_ITER, cv=N_FOLDS)
#         # Fit to data
#         print(f'Fitting {name}...')
        
#         searcher.fit(xf_train, y_train)

#         # Print the best parameters and best accuracy
#         print(f'Best parameters found for {name}: {searcher.best_params_}')
#         print(f'Best accuracy found {name}: {searcher.best_score_}')

#         model['best_estimator'] = searcher.best_estimator_
#         model['best_params'] = searcher.best_params_
#         model['best_score'] = searcher.best_score_

In [None]:
# pred = MODELS['knn']['best_estimator'].predict(xf_test)
# pred = np.argmax(pred, axis=1)
# print(len(pred))

In [None]:
# pred = triplet_model.predict(xc_test)
# pred = np.argmax(pred, axis=1)

In [None]:
submission = pd.DataFrame({'ImageId': range(1, pred.shape[0]+1), 'Label': pred})
submission.to_csv('submission.csv', index=False)