# ResNet

![](https://upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Piramidal_cell.svg/440px-Piramidal_cell.svg.png)

>ResNet consists of residual blocks that transfer the knowledge from one layer to  further layers by skipping some layers in between. These kinds of connections of  layers are known as skip-connections since we are skipping one or more layers.  Skip-connections help with the vanishing gradient issue by propagating the  gradients to further layers. This allows us to train very large convolutional neural  networks without loss of performance.
>Thakur, Abhishek. Approaching (Almost) Any Machine Learning Problem (p.205). Abhishek Thakur. 

The model pipeline is derived from this amazing notebook, so please upvote it too!

[OWN Jane Street with Keras NN](https://www.kaggle.com/tarlannazarov/own-jane-street-with-keras-nn)

## Description
base notebook is [[janestreet] ResNet starter](https://www.kaggle.com/code1110/janestreet-resnet-starter)   
the Architecture is refer to [Pytorch Resnet Starter[Training]](https://www.kaggle.com/a763337092/pytorch-resnet-starter-training)

In [None]:
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout, Concatenate, Lambda, GaussianNoise, Activation
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers.experimental.preprocessing import Normalization
import tensorflow as tf
import tensorflow_addons as tfa
import numpy as np
import pandas as pd
from tqdm import tqdm
from random import choices

SEED = 1111

tf.random.set_seed(SEED)
np.random.seed(SEED)

# train = pd.read_csv('../input/jane-street-market-prediction/train.csv')
train = pd.read_feather('../input/janestreet-save-as-feather/train.feather')
train = train.query('date > 85').reset_index(drop = True) 
train = train[train['weight'] != 0]

VALID=False
if VALID:
    train_va = train.query('data > 300')
    train = train.query('date <= 300').reset_index(drop = True) 

train.fillna(train.mean(),inplace=True)

#train['action']   = (train['resp']   > 0).astype('int')
#train['action_1'] = (train['resp_1'] > 0).astype('int')
#train['action_2'] = (train['resp_2'] > 0).astype('int')
#train['action_3'] = (train['resp_3'] > 0).astype('int')
#train['action_4'] = (train['resp_4'] > 0).astype('int')

train['cross_41_42_43'] = train['feature_41'] + train['feature_42'] + train['feature_43']
train['cross_1_2'] = train['feature_1'] / (train['feature_2'] + 1e-5)

features = [c for c in train.columns if "feature" in c]
features.extend(['cross_41_42_43', 'cross_1_2'])

f_mean = np.mean(train[features[1:]].values,axis=0)

resp_cols = ['resp_1', 'resp_2', 'resp_3', 'resp', 'resp_4']
#target_cols = ['action', 'action_1', 'action_2', 'action_3', 'action_4']

y_train = np.stack([(train[c] > 0).astype('int') for c in resp_cols]).T

In [None]:
X_train = train.loc[:, features].values

In [None]:
len(features)

# ResNet
derived from [[janestreet] ResNet with AutoEncoder (infer)](https://www.kaggle.com/code1110/janestreet-resnet-with-autoencoder-infer).

In [None]:
def create_resnet(n_features, n_labels, learning_rate=1e-03, label_smoothing=1e-02, dropout_rate=0.2, hidden_size=160):    
    input_0 = tf.keras.layers.Input(shape = n_features, name = 'Input0')

    head_0 = tf.keras.Sequential([
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(dropout_rate)
    ])
    
    input_1 = head_0(input_0)
    
    head_1 = tf.keras.Sequential([
        tf.keras.layers.Dense(hidden_size), 
        tf.keras.layers.BatchNormalization(),
        #tf.keras.layers.LeakyReLU(),
        tf.keras.layers.Activation(tf.keras.activations.swish),
        tf.keras.layers.Dropout(dropout_rate)
        ],name='Head1') 

    input_2 = head_1(input_1)
    input_2_concat = tf.keras.layers.Concatenate()([input_1, input_2])

    head_2 = tf.keras.Sequential([
        tf.keras.layers.Dense(hidden_size), 
        tf.keras.layers.BatchNormalization(),
        #tf.keras.layers.LeakyReLU(),
        tf.keras.layers.Activation(tf.keras.activations.swish),
        tf.keras.layers.Dropout(dropout_rate)
        ],name='Head2')

    input_3 = head_2(input_2_concat)
    input_3_concat = tf.keras.layers.Concatenate()([input_2, input_3]) 
    
    head_3 = tf.keras.Sequential([
        tf.keras.layers.Dense(hidden_size), 
        tf.keras.layers.BatchNormalization(),
        #tf.keras.layers.LeakyReLU(),
        tf.keras.layers.Activation(tf.keras.activations.swish),
        tf.keras.layers.Dropout(dropout_rate)
        ],name='Head3')

    input_4 = head_3(input_3_concat)
    input_4_concat = tf.keras.layers.Concatenate()([input_3, input_4]) 
    
    head_4 = tf.keras.Sequential([
        tf.keras.layers.Dense(n_labels),
        tf.keras.layers.Activation("sigmoid")
        ],name='Head4')
    
    output = head_4(input_4_concat)

    model = tf.keras.models.Model(inputs = input_0, outputs = output)
    opt = tfa.optimizers.RectifiedAdam(learning_rate=learning_rate)
    opt = tfa.optimizers.SWA(opt)
    model.compile(optimizer=opt, 
                  loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=label_smoothing), 
                  metrics=['AUC'])
    
    return model

tf.keras.backend.clear_session()
clf = create_resnet(len(features), len(resp_cols), 1e-03, 1e-02)

clf.summary()

In [None]:
tf.keras.utils.plot_model(clf)

# Fit & Predict

In [None]:
TRAIN = True
if TRAIN:
    clf.fit(X_train, y_train, epochs=200, batch_size=4096 * 16)
    # save model
    clf.save('resnet_cross_feature.h5')
else:
    clf.load_weights('../input/resnet-train-haya/resnet.h5')
    
models = [clf]

th = 0.501

f = np.median
models = models[-3:]

In [None]:
import janestreet
env = janestreet.make_env()
for (test_df, pred_df) in tqdm(env.iter_test()):
    if test_df['weight'].item() > 0:
        test_df['cross_41_42_43'] = test_df['feature_41'] + test_df['feature_42'] + test_df['feature_43']
        test_df['cross_1_2'] = test_df['feature_1'] / (test_df['feature_2'] + 1e-5)
        x_tt = test_df.loc[:, features].values
        if np.isnan(x_tt[:, 1:].sum()):
            x_tt[:, 1:] = np.nan_to_num(x_tt[:, 1:]) + np.isnan(x_tt[:, 1:]) * f_mean
        pred = np.mean([model([x_tt[:, :len(features)]], training = False).numpy() for model in models],axis=0)
        pred = f(pred)
        pred_df.action = np.where(pred >= th, 1, 0).astype(int)
    else:
        pred_df.action = 0
    env.predict(pred_df)

That's it!