In [None]:
# From https://www.kaggle.com/bhavikjain/tps-june-21-eda-models

In [1]:
!pip install tensorflow



In [2]:
!pip install tensorflow-addons



In [47]:
import pandas as pd
import numpy as np

from sklearn.model_selection import StratifiedKFold
from sklearn import preprocessing
from sklearn.metrics import log_loss

import gc
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow.keras.backend as K
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import activations,callbacks
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import initializers

from keras.models import Model

In [48]:
train_with_ids = pd.read_csv('train.csv')
train = train_with_ids.drop('id', 1)

test = pd.read_csv('test.csv').drop('id', 1)

X_dim = np.shape(train)[1] - 2 # id and target
print(X_dim)

74


In [49]:
targets = pd.get_dummies(train['target'])

In [50]:
def custom_metric(y_true, y_pred):
    y_pred = K.clip(y_pred, 1e-15, 1-1e-15)
    loss = K.mean(cce(y_true, y_pred))
    return loss

cce = tf.keras.losses.CategoricalCrossentropy()

es = tf.keras.callbacks.EarlyStopping(
    monitor='val_custom_metric', min_delta=1e-05, patience=5, verbose=0,
    mode='min', baseline=None, restore_best_weights=True)

plateau = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_custom_metric', factor=0.7, patience=2, verbose=0,
    mode='min')

In [51]:
def conv_model():

    conv_inputs = layers.Input(shape = (X_dim))
    embed = layers.Embedding (input_dim = 354, 
                              output_dim = 7,
                              embeddings_regularizer='l2')(conv_inputs)
    embed = layers.Conv1D(12,1,activation = 'relu')(embed)        
    embed = layers.Flatten()(embed)
    hidden = layers.Dropout(0.3)(embed)
    
    hidden = tfa.layers.WeightNormalization(
                layers.Dense(
                units=32,
                activation ='selu',
                kernel_initializer = "lecun_normal"))(hidden)
    
    output = layers.Dropout(0.3)(layers.Concatenate()([embed, hidden]))
    output = tfa.layers.WeightNormalization(
    layers.Dense(
                units = 32,
                activation='relu',
                kernel_initializer = "lecun_normal"))(output) 
    output = layers.Dropout(0.4)(layers.Concatenate()([embed, hidden, output]))
    output = tfa.layers.WeightNormalization(
    layers.Dense(
                units = 32, 
                activation = 'relu',
                kernel_initializer = "lecun_normal"))(output)
    
    conv_outputs = layers.Dense(
                units = 9, 
                activation ='softmax',
                kernel_initializer ="lecun_normal")(output)
    
    model = Model(conv_inputs,conv_outputs)
    
    return model

In [52]:
import datetime;
import time;

def do_run(run_number):
    print(datetime.datetime.now())
    start = time.time()

    oof_NN_a = np.zeros((train.shape[0],9))
    pred_NN_a_train = np.zeros((train.shape[0],9))
    pred_NN_a_test = np.zeros((test.shape[0],9))

    N_FOLDS = 40
    EPOCH = 40

    skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True)

    for fold, (tr_idx, ts_idx) in enumerate(skf.split(train,train.iloc[:,-1])):
        #print(f"\n ====== TRAINING FOLD {fold} =======")

        X_train = train.iloc[:,1:-1].iloc[tr_idx]
        y_train = targets.iloc[tr_idx]
        X_test = train.iloc[:,1:-1].iloc[ts_idx]
        y_test = targets.iloc[ts_idx]

        K.clear_session()

        #print("\n-----Convolution model Training----\n")
        model_conv = conv_model()

        model_conv.compile(loss='categorical_crossentropy', 
                                optimizer= 'adam', #keras.optimizers.Adam(learning_rate=2e-4), 
                                metrics=custom_metric)
        model_conv.fit(X_train, y_train,
                  batch_size = 256, epochs = EPOCH,
                  validation_data=(X_test, y_test),
                  callbacks=[es, plateau],
                  verbose = 0)

        pred_a = model_conv.predict(X_test) 
        oof_NN_a[ts_idx] += pred_a 
        score_NN_a = log_loss(y_test, pred_a)
        print(f"RUN {run_number} - FOLD {fold} - Score: {score_NN_a}")
        pred_NN_a_test += model_conv.predict(test.iloc[:,1:]) / N_FOLDS 
        
        train_no_target = train.drop('target', 1)
        pred_NN_a_train += model_conv.predict(train_no_target.iloc[:,1:]) / N_FOLDS 

    score_a = log_loss(targets, oof_NN_a)
    print(f"=== FINAL SCORE CONVOLUTION MODEL : {score_a} ===")
    print(datetime.datetime.now())
    minutes = (time.time() - start) / 60
    print('minutes', minutes, '\n')
    
    return pred_NN_a_test, pred_NN_a_train
    

In [54]:
n_runs = 11
test_result, train_result = do_run(1)

for i in range(2, n_runs+1):
    loop_test_result, loop_train_result = do_run(i)
    test_result = test_result + loop_test_result
    train_result = train_result + loop_train_result
    
test_result = test_result / n_runs
train_result = train_result / n_runs

2021-06-27 05:03:48.055136
RUN 1 - FOLD 0 - Score: 1.7425387422591447
RUN 1 - FOLD 1 - Score: 1.7489116918619723
RUN 1 - FOLD 2 - Score: 1.7420245331533253
RUN 1 - FOLD 3 - Score: 1.7516593484152108
RUN 1 - FOLD 4 - Score: 1.7374436429884286
RUN 1 - FOLD 5 - Score: 1.7659408931106328
RUN 1 - FOLD 6 - Score: 1.751011481987685
RUN 1 - FOLD 7 - Score: 1.7456238757837563
RUN 1 - FOLD 8 - Score: 1.7516188460946083
RUN 1 - FOLD 9 - Score: 1.743616286149621
RUN 1 - FOLD 10 - Score: 1.7346673598546534
RUN 1 - FOLD 11 - Score: 1.7403371008319781
RUN 1 - FOLD 12 - Score: 1.7640618617653847
RUN 1 - FOLD 13 - Score: 1.7445247715741397
RUN 1 - FOLD 14 - Score: 1.7559754335798323
RUN 1 - FOLD 15 - Score: 1.7531189041450619
RUN 1 - FOLD 16 - Score: 1.7597055808991193
RUN 1 - FOLD 17 - Score: 1.734130567053333
RUN 1 - FOLD 18 - Score: 1.7399619580443948
RUN 1 - FOLD 19 - Score: 1.7518046247713268
RUN 1 - FOLD 20 - Score: 1.7466915615588428
RUN 1 - FOLD 21 - Score: 1.755674984737858
RUN 1 - FOLD 22 - S

RUN 5 - FOLD 15 - Score: 1.7437316907977685
RUN 5 - FOLD 16 - Score: 1.7349573310643434
RUN 5 - FOLD 17 - Score: 1.756947725495696
RUN 5 - FOLD 18 - Score: 1.7475441708290949
RUN 5 - FOLD 19 - Score: 1.7363472023658455
RUN 5 - FOLD 20 - Score: 1.7703909792598338
RUN 5 - FOLD 21 - Score: 1.7536700777061283
RUN 5 - FOLD 22 - Score: 1.7493681210190057
RUN 5 - FOLD 23 - Score: 1.737406963852793
RUN 5 - FOLD 24 - Score: 1.7337396510101855
RUN 5 - FOLD 25 - Score: 1.7564158952571451
RUN 5 - FOLD 26 - Score: 1.7385485713906585
RUN 5 - FOLD 27 - Score: 1.7475286889135837
RUN 5 - FOLD 28 - Score: 1.746258626424242
RUN 5 - FOLD 29 - Score: 1.7548437268838286
RUN 5 - FOLD 30 - Score: 1.7422427660748363
RUN 5 - FOLD 31 - Score: 1.7430968268044293
RUN 5 - FOLD 32 - Score: 1.7365443023733795
RUN 5 - FOLD 33 - Score: 1.7442073958676307
RUN 5 - FOLD 34 - Score: 1.7591212812777608
RUN 5 - FOLD 35 - Score: 1.7600270016409456
RUN 5 - FOLD 36 - Score: 1.7575048909269273
RUN 5 - FOLD 37 - Score: 1.74262986

RUN 9 - FOLD 31 - Score: 1.7418426658239217
RUN 9 - FOLD 32 - Score: 1.7530800463031977
RUN 9 - FOLD 33 - Score: 1.7428721500791609
RUN 9 - FOLD 34 - Score: 1.757747287577763
RUN 9 - FOLD 35 - Score: 1.7436723824113607
RUN 9 - FOLD 36 - Score: 1.7385353985328227
RUN 9 - FOLD 37 - Score: 1.7386998031821101
RUN 9 - FOLD 38 - Score: 1.758275546335429
RUN 9 - FOLD 39 - Score: 1.7374186799583957
=== FINAL SCORE CONVOLUTION MODEL : 1.7472370730330387 ===
2021-06-27 10:48:24.016931
minutes 37.54686415195465 

2021-06-27 10:48:24.034318
RUN 10 - FOLD 0 - Score: 1.7478530859928578
RUN 10 - FOLD 1 - Score: 1.739514238377288
RUN 10 - FOLD 2 - Score: 1.7566556204266845
RUN 10 - FOLD 3 - Score: 1.7454923409432173
RUN 10 - FOLD 4 - Score: 1.738334350701049
RUN 10 - FOLD 5 - Score: 1.7459007139038294
RUN 10 - FOLD 6 - Score: 1.7473438061006368
RUN 10 - FOLD 7 - Score: 1.7439466022823007
RUN 10 - FOLD 8 - Score: 1.7457756156906485
RUN 10 - FOLD 9 - Score: 1.7517536015711725
RUN 10 - FOLD 10 - Score: 1

In [55]:
abs(log_loss(train['target'], train_result))

1.7289439103236066

In [56]:
submission = pd.DataFrame(train_result)
submission.columns = ['Class_1', 'Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6', 'Class_7', 'Class_8', 'Class_9']
submission['id'] = train_with_ids['id']

submission.to_csv("cnn_ensemble_train.csv", index=False)

In [57]:
submission = pd.read_csv("./sample_submission.csv")
submission['Class_1']=test_result[:,0]
submission['Class_2']=test_result[:,1]
submission['Class_3']=test_result[:,2]
submission['Class_4']=test_result[:,3]
submission['Class_5']=test_result[:,4]
submission['Class_6']=test_result[:,5]
submission['Class_7']=test_result[:,6]
submission['Class_8']=test_result[:,7]
submission['Class_9']=test_result[:,8]

submission.to_csv("submission_cnn_ensemble_test.csv", index=False)