# ResNeXt-29

## Get and unzip dataset

In [1]:
import numpy as np
import pandas as pd
import os

!pip install pyunpack
!pip install patool
os.system('apt-get install p7zip')
from pyunpack import Archive
import shutil

if not os.path.exists('/kaggle/working/data'):
    os.makedirs('/kaggle/working/data')
Archive('/kaggle/input/statoil-iceberg-classifier-challenge/test.json.7z').extractall('/kaggle/working/data/')
Archive('/kaggle/input/statoil-iceberg-classifier-challenge/train.json.7z').extractall('/kaggle/working/data/')

Collecting pyunpack
  Downloading pyunpack-0.2.2-py2.py3-none-any.whl (3.8 kB)
Collecting entrypoint2
  Downloading entrypoint2-0.2.3-py2.py3-none-any.whl (8.7 kB)
Collecting easyprocess
  Downloading EasyProcess-0.3-py2.py3-none-any.whl (7.9 kB)
Collecting argparse
  Downloading argparse-1.4.0-py2.py3-none-any.whl (23 kB)
Installing collected packages: argparse, entrypoint2, easyprocess, pyunpack
Successfully installed argparse-1.4.0 easyprocess-0.3 entrypoint2-0.2.3 pyunpack-0.2.2
You should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.[0m
Collecting patool
  Downloading patool-1.12-py2.py3-none-any.whl (77 kB)
[K     |████████████████████████████████| 77 kB 357 kB/s 
[?25hInstalling collected packages: patool
Successfully installed patool-1.12
You should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.[0m


## ResNeXt-29 model configuration

In [2]:
from __future__ import print_function
from __future__ import absolute_import
from __future__ import division
!pip install keras_applications

# from keras.models import Model
from tensorflow.python.keras.models import Model
from keras.layers.core import Dense, Lambda, Activation
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import GlobalAveragePooling2D, GlobalMaxPooling2D
from keras.layers import Input, GaussianNoise
from keras.layers.merge import concatenate, add
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2
from keras.engine.topology import get_source_inputs
from keras_applications.imagenet_utils import _obtain_input_shape
import keras.backend as K


def ResNext(input_shape=None, depth=29, cardinality=8, width=64, weight_decay=5e-4,
            include_top=True, weights=None, input_tensor=None,
            pooling=None, classes=10):

    if type(depth) == int:
        if (depth - 2) % 9 != 0:
            raise ValueError('Depth of the network must be such that (depth - 2)'
                             'should be divisible by 9.')

    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=32,
                                      min_size=8,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    x = __create_res_next(classes, img_input, include_top, depth, cardinality, width,
                          weight_decay, pooling)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input
    # Create model.
    model = Model(inputs, x, name='resnext')

    # load weights
    if weights:
        model.load_weights(weights)

    return model


def __initial_conv_block(input, weight_decay=5e-4):
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    x = Conv2D(32, (3, 3), padding='same', use_bias=True, kernel_initializer='he_normal',
               kernel_regularizer=l2(weight_decay))(input)
    x = BatchNormalization(axis=channel_axis)(x)
    x = Activation('elu')(x)

    return x


def __grouped_convolution_block(input, grouped_channels, cardinality, strides, weight_decay=5e-4):
    init = input
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    group_list = []

    if cardinality == 1:
        # with cardinality 1, it is a standard convolution
        x = Conv2D(grouped_channels, (3, 3), padding='same', use_bias=True, strides=(strides, strides),
                   kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(init)
        x = BatchNormalization(axis=channel_axis)(x)
        x = Activation('elu')(x)
        return x

    for c in range(cardinality):
        x = Lambda(lambda z: z[:, :, :, c * grouped_channels:(c + 1) * grouped_channels]
        if K.image_data_format() == 'channels_last' else
        lambda z: z[:, c * grouped_channels:(c + 1) * grouped_channels, :, :])(input)

        x = Conv2D(grouped_channels, (3, 3), padding='same', use_bias=True, strides=(strides, strides),
                   kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(x)

        group_list.append(x)

    group_merge = concatenate(group_list, axis=channel_axis)
    x = BatchNormalization(axis=channel_axis)(group_merge)
    x = Activation('elu')(x)

    return x


def __bottleneck_block(input, filters=64, cardinality=8, strides=1, weight_decay=5e-4):
    init = input

    grouped_channels = int(filters / cardinality)
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    # Check if input number of filters is same as 16 * k, else create convolution2d for this input
    if K.image_data_format() == 'channels_first':
        if init.shape[1] != 2 * filters:
            init = Conv2D(filters * 2, (1, 1), padding='same', strides=(strides, strides),
                          use_bias=True, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(init)
            init = BatchNormalization(axis=channel_axis)(init)
    else:
        if init.shape[-1] != 2 * filters:
            init = Conv2D(filters * 2, (1, 1), padding='same', strides=(strides, strides),
                          use_bias=True, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(init)
            init = BatchNormalization(axis=channel_axis)(init)

    x = Conv2D(filters, (1, 1), padding='same', use_bias=False,
               kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(input)
    x = BatchNormalization(axis=channel_axis)(x)
    x = Activation('elu')(x)

    x = __grouped_convolution_block(x, grouped_channels, cardinality, strides, weight_decay)

    x = Conv2D(filters * 2, (1, 1), padding='same', use_bias=True, kernel_initializer='he_normal',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization(axis=channel_axis)(x)

    x = add([init, x])
    x = Activation('elu')(x)

    return x


def __create_res_next(nb_classes, img_input, include_top, depth=29, cardinality=8, width=4,
                      weight_decay=5e-4, pooling=None):
    if type(depth) is list or type(depth) is tuple:
        # If a list is provided, defer to user how many blocks are present
        N = list(depth)
    else:
        # Otherwise, default to 3 blocks each of default number of group convolution blocks
        N = [(depth - 2) // 9 for _ in range(3)]

    filters = cardinality * width
    filters_list = []

    for i in range(len(N)):
        filters_list.append(filters)
        filters *= 2  # double the size of the filters

    x = Lambda(lambda x: x[:, :, :, 0:2]
                         if K.image_data_format() == 'channels_last'
                         else x[:, 0:2, :, :])(img_input)

    angle = Lambda(lambda x: x[:, :, :, 2:]
                             if K.image_data_format() == 'channels_last'
                             else x[:, 2:, :, :])(img_input)

    x_noise = GaussianNoise(5e-2)(x)
    angle_noise = GaussianNoise(5e-3)(angle)

    noise_input = concatenate([x_noise, angle_noise], axis=-1)

    x = __initial_conv_block(noise_input, weight_decay)

    # block 1 (no pooling)
    for i in range(N[0]):
        x = __bottleneck_block(x, filters_list[0], cardinality, strides=1, weight_decay=weight_decay)

    N = N[1:]  # remove the first block from block definition list
    filters_list = filters_list[1:]  # remove the first filter from the filter list

    # block 2 to N
    for block_idx, n_i in enumerate(N):
        for i in range(n_i):
            if i == 0:
                x = __bottleneck_block(x, filters_list[block_idx], cardinality, strides=2,
                                       weight_decay=weight_decay)
            else:
                x = __bottleneck_block(x, filters_list[block_idx], cardinality, strides=1,
                                       weight_decay=weight_decay)

    if include_top:
        x = GlobalAveragePooling2D()(x)
        x = Dense(nb_classes, use_bias=True, kernel_regularizer=l2(weight_decay),
                  kernel_initializer='he_normal', activation='softmax')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D()(x)

    return x

Collecting keras_applications
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 269 kB/s 
Installing collected packages: keras-applications
Successfully installed keras-applications-1.0.8
You should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.[0m


## Utils
There are a few functions here.
1. load_data - Read json data file into pandas dataframe.
2. preprocess - Convert band_1, band_2 and inc_angle data into 75x75 array
3. prepare_data_cv - Split data for cross-validation
4. prepare_data_full - Preprocess data and label
5. logloss_softmax - 
6. get_model_callbacks - perform early stopping and reduce learning rate when a metric has stopped improving.
7. load_model - load training model
8. get_resnext - configure ResNeXt model
9. prepare_submission - create csv submission file

In [3]:
from keras.utils import to_categorical
from sklearn.model_selection import KFold, train_test_split
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator

def load_data(path):
    train = pd.read_json(os.path.join(path, "train.json"))
    test = pd.read_json(os.path.join(path, "test.json"))
    return (train, test)


def preprocess(df, 
               means=(-22.159262, -24.953745, 40.021883465782651),
               stds=(5.33146, 4.5463958, 4.0815391476694414)):
    X_band_1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) 
                         for band in df["band_1"]])
    X_band_2 = np.array([np.array(band).astype(np.float32).reshape(75, 75)
                         for band in df["band_2"]])

    angl = df['inc_angle'].map(lambda x: np.cos(x * np.pi / 180) if x != 'na' else means[2])
    angl = np.array([np.full(shape=(75, 75), fill_value=angel).astype(np.float32)
                     for angel in angl])

    X_band_1 = (X_band_1 - means[0]) / stds[0]
    X_band_2 = (X_band_2 - means[1]) / stds[1]
    angl = (angl - means[2]) / stds[2]

    images = np.concatenate([X_band_1[:, :, :, np.newaxis],
                             X_band_2[:, :, :, np.newaxis],
                             angl[:, :, :, np.newaxis]],
                            axis=-1)
    return images

def prepare_data_cv(path):
    train, test = load_data(path)
    X_train, y_train = (preprocess(train),
                        to_categorical(train['is_iceberg'].to_numpy().reshape(-1, 1)))

    kfold_data = []
    kf = KFold(n_splits=5, shuffle=True, random_state=0xCAFFE)

    for train_indices, val_indices in kf.split(y_train):
        print(train_indices)
        X_train_cv = X_train[train_indices]
        y_train_cv = y_train[train_indices]

        X_val = X_train[val_indices]
        y_val = y_train[val_indices]

        kfold_data.append((X_train_cv, y_train_cv, X_val, y_val))

    X_test = preprocess(test)

    return (kfold_data, X_test)

def prepare_data_full(path):
    train, test = load_data(path)
    return (preprocess(train), to_categorical(train['is_iceberg'].to_numpy().reshape(-1, 1)))

def logloss_softmax(y_true, y_pred, eps=1e-15):
    proba = y_pred[np.arange(len(y_pred)), np.argmax(y_true, axis=1)]
    proba = np.clip(proba, eps, 1 - eps)
    return -np.mean(np.log(proba))


def get_model_callbacks(save_dir):
    stopping = EarlyStopping(monitor='val_loss',
                             min_delta=1e-3,
                             patience=45,
                             verbose=False,
                             mode='min')

    board_path = os.path.join(save_dir, 'board')
    if not os.path.exists(board_path):
        os.makedirs(board_path)

    board = TensorBoard(log_dir=board_path)

    lr_scheduler = ReduceLROnPlateau(monitor='val_loss',
                                    factor=0.3,
                                    patience=15,
                                    verbose=True,
                                    mode='min',
                                    epsilon=5e-3,
                                    min_lr=1e-5)

    model_path = os.path.join(save_dir, 'model/model_weights.hdf5')
    if not os.path.exists(os.path.dirname(model_path)):
        os.makedirs(os.path.dirname(model_path))

    model_checkpoint = ModelCheckpoint(model_path,
                                       monitor='val_loss',
                                       verbose=False,
                                       save_best_only=True,
                                       save_weights_only=False,
                                       mode='min',
                                       period=1)

    callbacks = [stopping, board, lr_scheduler, model_checkpoint]
    return callbacks


def load_model(model_loader_fn, weights=None):
    from keras.optimizers import RMSprop, Adam
    
    ## load model using function name
    model = model_loader_fn()

    if weights:
        model.load_weights(weights)

    # optimizer
    opt = RMSprop(lr=1e-3)
    model.compile(loss='binary_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])

    model.summary()
    return model


def get_resnext():

    model = ResNext(
        input_shape=(75, 75, 3),
        depth=29,
        cardinality=4,
        width=8,
        weight_decay=0.,
        include_top=True,
        weights=None,
        classes=2)

    return model

def prepare_submission(models_proba, path, high_thr=0.9, low_thr=0.1):
    _, test = load_data('./data/data/processed')
    models_proba = np.array(models_proba)
    proba = np.where(np.all(models_proba > high_thr, axis=0),
                     np.max(models_proba, axis=0),
                     np.where(np.all(models_proba < low_thr, axis=0),
                              np.min(models_proba, axis=0),
                              np.median(models_proba, axis=0))
                     )

    submission = pd.DataFrame()
    submission['id'] = test['id']
    submission['is_iceberg'] = proba.reshape((proba.shape[0]))
    submission.to_csv(path, index=False)
    
## data augmentation
def get_data_generator(X, y, batch_size=32):
    img_gen = ImageDataGenerator(
        rotation_range=0.,
        width_shift_range=0.5,
        height_shift_range=0.5,
        shear_range=0.,
        zoom_range=0.,
        fill_mode='wrap',
        horizontal_flip=False,
        vertical_flip=True,
        data_format='channels_last')

    img_gen.fit(X)

    return img_gen.flow(X, y, batch_size=batch_size)

## Training

In [4]:
from sklearn.metrics import roc_auc_score, average_precision_score
def train(experiment_path, plot_results=False):
    (kfold_data, X_test) = prepare_data_cv('./data/data/processed')
    
    models_proba = []
    models_proba_train = []
    models_acc = []
    models_roc = []
    models_logloss = []
    models_map = []
    
    for idx, data in enumerate(kfold_data):
        X_train, y_train, X_valid, y_valid = data
        model = load_model(get_resnext, weights=None)
        callbacks = get_model_callbacks(save_dir=os.path.join(experiment_path, 'fold_%02d' % idx))
        data_generator = get_data_generator(X_train, y_train, batch_size=128)

        model.fit_generator(
            data_generator,
            steps_per_epoch=10,
            epochs=1000,
            verbose=True,
            validation_data=(X_valid, y_valid),
            callbacks=callbacks,
            shuffle=True)

        model.load_weights(filepath=os.path.join(experiment_path, ('fold_%02d/model/model_weights.hdf5' % idx)))

        _, acc_val = model.evaluate(X_valid, y_valid, verbose=False)
        proba = model.predict(X_valid)
        proba_test = model.predict(X_test)[:, 1]
        
        ## include the prediction for training
        proba_train = model.predict(xtrain)[:, 1]
        models_proba_train.append(proba_train)

        models_proba.append(proba_test)
        models_acc.append(acc_val)
        models_roc.append(roc_auc_score(y_valid.argmax(axis=1), proba[:, 1]))
        models_map.append(average_precision_score(y_valid.argmax(axis=1), proba[:, 1]))
        models_logloss.append(logloss_softmax(y_valid, proba))

        prepare_submission([proba_test], os.path.join(experiment_path, 'fold_%02d/prediction.csv' % idx))

        if plot_results:
            plots_path = os.path.join(experiment_path, 'fold_%02d/plots' % idx)
            if not os.path.exists(plots_path):
                os.makedirs(plots_path)

            plot_precision_recall(proba[:, 1], y_valid.argmax(axis=1),
                                  path=os.path.join(plots_path, 'recall_precision.jpg'))

            plot_roc(proba[:, 1], y_valid.argmax(axis=1),
                     path=os.path.join(plots_path, 'roc.jpg'))

            plot_confusion_matrix(proba[:, 1], y_valid.argmax(axis=1),
                                  path=os.path.join(plots_path, 'conf.jpg'))

        print('Loss:\nMean: %f\nStd: %f\nMin: %f\nMax: %f\n\n' % (np.mean(models_logloss),
                                                                  np.std(models_logloss),
                                                                  np.min(models_logloss),
                                                                  np.max(models_logloss)))

        print('Acc:\nMean: %f\nStd: %f\nMin: %f\nMax: %f\n\n' % (np.mean(models_acc),
                                                                  np.std(models_acc),
                                                                  np.min(models_acc),
                                                                  np.max(models_acc)))

        print('ROC AUC:\nMean: %f\nStd: %f\nMin: %f\nMax: %f\n\n' % (np.mean(models_roc),
                                                                     np.std(models_roc),
                                                                     np.min(models_roc),
                                                                     np.max(models_roc)))

        print('mAP:\nMean: %f\nStd: %f\nMin: %f\nMax: %f\n\n' % (np.mean(models_map),
                                                                 np.std(models_map),
                                                                 np.min(models_map),
                                                                 np.max(models_map)))

    prepare_submission(models_proba, os.path.join(experiment_path, 'resnext.csv'))
    
    
if __name__ == '__main__':
    xtrain, ytrain = prepare_data_full('./data/data/processed')
    train(experiment_path='./', plot_results=False)

[   0    1    3 ... 1600 1601 1602]
[   0    1    2 ... 1601 1602 1603]
[   1    2    3 ... 1601 1602 1603]
[   0    1    2 ... 1597 1600 1603]
[   0    2    3 ... 1601 1602 1603]
Model: "resnext"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 75, 75, 3)]  0                                            
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 75, 75, 2)    0           input_1[0][0]                    
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 75, 75, 1)    0           input_1[0][0]                    
__________________________________________________________________________________________________
gaussian_no