In [None]:
# From https://www.kaggle.com/bhavikjain/tps-june-21-eda-models

In [1]:
!pip install tensorflow



In [2]:
!pip install tensorflow-addons



In [87]:
import pandas as pd
import numpy as np

from sklearn.model_selection import StratifiedKFold
from sklearn import preprocessing
from sklearn.metrics import log_loss

import gc
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow.keras.backend as K
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import activations,callbacks
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import initializers
from sklearn.preprocessing import OrdinalEncoder

from keras.models import Model

In [88]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

train_features = train_df.drop(['target', 'id'], 1)
test_features = test_df.drop('id', 1)

In [89]:
def do_transform(the_train, the_test):
    full_df = pd.concat([the_train, the_test])
    for column in full_df:
        sorted_df = full_df.sort_values(by=column)
        encoder = OrdinalEncoder()
        encoder.fit(sorted_df[[column]])

        the_train[column + '_oe'] = encoder.transform(the_train[[column]])
        the_test[column + '_oe'] = encoder.transform(the_test[[column]])
        
    return the_train, the_test


train, test = do_transform(train_features, test_features)

In [90]:
print(len(train.columns))
print(len(test.columns))

150
150


In [91]:
targets = pd.get_dummies(train_df['target'])
X_dim = X_dim = np.shape(train)[1]
print(X_dim)

150


In [92]:
def custom_metric(y_true, y_pred):
    y_pred = K.clip(y_pred, 1e-15, 1-1e-15)
    loss = K.mean(cce(y_true, y_pred))
    return loss

cce = tf.keras.losses.CategoricalCrossentropy()

es = tf.keras.callbacks.EarlyStopping(
    monitor='val_custom_metric', min_delta=1e-05, patience=5, verbose=0,
    mode='min', baseline=None, restore_best_weights=True)

plateau = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_custom_metric', factor=0.7, patience=2, verbose=0,
    mode='min')

In [93]:
def conv_model():

    conv_inputs = layers.Input(shape = (X_dim))
    embed = layers.Embedding (input_dim = 354, 
                              output_dim = 7,
                              embeddings_regularizer='l2')(conv_inputs)
    embed = layers.Conv1D(12,1,activation = 'relu')(embed)        
    embed = layers.Flatten()(embed)
    hidden = layers.Dropout(0.3)(embed)
    
    hidden = tfa.layers.WeightNormalization(
                layers.Dense(
                units=32,
                activation ='selu',
                kernel_initializer = "lecun_normal"))(hidden)
    
    output = layers.Dropout(0.3)(layers.Concatenate()([embed, hidden]))
    output = tfa.layers.WeightNormalization(
    layers.Dense(
                units = 32,
                activation='relu',
                kernel_initializer = "lecun_normal"))(output) 
    output = layers.Dropout(0.4)(layers.Concatenate()([embed, hidden, output]))
    output = tfa.layers.WeightNormalization(
    layers.Dense(
                units = 32, 
                activation = 'relu',
                kernel_initializer = "lecun_normal"))(output)
    
    conv_outputs = layers.Dense(
                units = 9, 
                activation ='softmax',
                kernel_initializer ="lecun_normal")(output)
    
    model = Model(conv_inputs,conv_outputs)
    
    return model

In [100]:
import datetime;
import time;

def do_run(run_number):
    print(datetime.datetime.now())
    start = time.time()

    oof_NN_a = np.zeros((train.shape[0],9))
    pred_NN_a_train = np.zeros((train.shape[0],9))
    pred_NN_a_test = np.zeros((test.shape[0],9))

    N_FOLDS = 50
    EPOCH = 60

    skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True)

    for fold, (tr_idx, ts_idx) in enumerate(skf.split(train,train.iloc[:,-1])):
        #print(f"\n ====== TRAINING FOLD {fold} =======")

        X_train = train.iloc[:,0:].iloc[tr_idx]
        y_train = targets.iloc[tr_idx]
        X_test = train.iloc[:,0:].iloc[ts_idx]
        y_test = targets.iloc[ts_idx]

        K.clear_session()

        #print("\n-----Convolution model Training----\n")
        model_conv = conv_model()

        model_conv.compile(loss='categorical_crossentropy', 
                                optimizer= 'adam', #keras.optimizers.Adam(learning_rate=2e-4), 
                                metrics=custom_metric)
        
        K.set_value(model_conv.optimizer.learning_rate, 2e-4)
        model_conv.fit(X_train, y_train,
                  batch_size = 256, epochs = EPOCH,
                  validation_data=(X_test, y_test),
                  callbacks=[es, plateau],
                  verbose = 0)

        pred_a = model_conv.predict(X_test) 
        oof_NN_a[ts_idx] += pred_a 
        score_NN_a = log_loss(y_test, pred_a)
        print(f"RUN {run_number} - FOLD {fold} - Score: {score_NN_a}")
        pred_NN_a_test += model_conv.predict(test.iloc[:,0:]) / N_FOLDS 
        pred_NN_a_train += model_conv.predict(train.iloc[:,0:]) / N_FOLDS 

    score_a = log_loss(targets, oof_NN_a)
    print(f"=== FINAL SCORE CONVOLUTION MODEL : {score_a} ===")
    print(datetime.datetime.now())
    minutes = (time.time() - start) / 60
    print('minutes', minutes, '\n')
    
    return pred_NN_a_test, pred_NN_a_train
    

In [101]:
n_runs = 11
test_result, train_result = do_run(1)

for i in range(2, n_runs+1):
    loop_test_result, loop_train_result = do_run(i)
    test_result = test_result + loop_test_result
    train_result = train_result + loop_train_result
    
test_result = test_result / n_runs
train_result = train_result / n_runs

2021-06-28 02:19:15.226071




RUN 1 - FOLD 0 - Score: 1.7500080902157351
RUN 1 - FOLD 1 - Score: 1.7384451197898014
RUN 1 - FOLD 2 - Score: 1.7456390560418367
RUN 1 - FOLD 3 - Score: 1.7629970220215618
RUN 1 - FOLD 4 - Score: 1.7092326966375113
RUN 1 - FOLD 5 - Score: 1.739131463920232
RUN 1 - FOLD 6 - Score: 1.7623983172276056
RUN 1 - FOLD 7 - Score: 1.7296956301946194
RUN 1 - FOLD 8 - Score: 1.7455806619315408
RUN 1 - FOLD 9 - Score: 1.7602252573771402
RUN 1 - FOLD 10 - Score: 1.7686445319862105
RUN 1 - FOLD 11 - Score: 1.7310334147484974
RUN 1 - FOLD 12 - Score: 1.7327031864887104
RUN 1 - FOLD 13 - Score: 1.7639899813756346
RUN 1 - FOLD 14 - Score: 1.7377340403641575
RUN 1 - FOLD 15 - Score: 1.737863386251498
RUN 1 - FOLD 16 - Score: 1.7813178463252262
RUN 1 - FOLD 17 - Score: 1.7314485691105947
RUN 1 - FOLD 18 - Score: 1.7369493845056276
RUN 1 - FOLD 19 - Score: 1.7351157227782532
RUN 1 - FOLD 20 - Score: 1.7425142383705825
RUN 1 - FOLD 21 - Score: 1.75106002821913
RUN 1 - FOLD 22 - Score: 1.7709336436185985
RU



RUN 2 - FOLD 0 - Score: 1.7390485026296227
RUN 2 - FOLD 1 - Score: 1.7427835170594044
RUN 2 - FOLD 2 - Score: 1.7621150085409172
RUN 2 - FOLD 3 - Score: 1.7468529712921008
RUN 2 - FOLD 4 - Score: 1.7550102663999423
RUN 2 - FOLD 5 - Score: 1.763511789832264
RUN 2 - FOLD 6 - Score: 1.727429956048727
RUN 2 - FOLD 7 - Score: 1.7397079683523624
RUN 2 - FOLD 8 - Score: 1.7582825923627243
RUN 2 - FOLD 9 - Score: 1.7296934760119766
RUN 2 - FOLD 10 - Score: 1.7285958187915385
RUN 2 - FOLD 11 - Score: 1.7844862252594904
RUN 2 - FOLD 12 - Score: 1.7501694556968286
RUN 2 - FOLD 13 - Score: 1.7536769050760195
RUN 2 - FOLD 14 - Score: 1.7644232330317609
RUN 2 - FOLD 15 - Score: 1.7351215240219608
RUN 2 - FOLD 16 - Score: 1.7334722299799323
RUN 2 - FOLD 17 - Score: 1.7597656541913749
RUN 2 - FOLD 18 - Score: 1.7688293039649725
RUN 2 - FOLD 19 - Score: 1.7387778335334734
RUN 2 - FOLD 20 - Score: 1.7396433761278167
RUN 2 - FOLD 21 - Score: 1.735690149722621
RUN 2 - FOLD 22 - Score: 1.7591055994639173
R



RUN 3 - FOLD 0 - Score: 1.7465070047085174
RUN 3 - FOLD 1 - Score: 1.7444278765080963
RUN 3 - FOLD 2 - Score: 1.7489312380040065
RUN 3 - FOLD 3 - Score: 1.7727419250868262
RUN 3 - FOLD 4 - Score: 1.745968037666753
RUN 3 - FOLD 5 - Score: 1.7596466587278992
RUN 3 - FOLD 6 - Score: 1.7410853618402033
RUN 3 - FOLD 7 - Score: 1.7387251353329047
RUN 3 - FOLD 8 - Score: 1.7590519602186978
RUN 3 - FOLD 9 - Score: 1.7531201275275088
RUN 3 - FOLD 10 - Score: 1.7564077748642304
RUN 3 - FOLD 11 - Score: 1.7339088375000282
RUN 3 - FOLD 12 - Score: 1.7554077565250918
RUN 3 - FOLD 13 - Score: 1.730172403323464
RUN 3 - FOLD 14 - Score: 1.7511442809719593
RUN 3 - FOLD 15 - Score: 1.7454854355631395
RUN 3 - FOLD 16 - Score: 1.7615362770659848
RUN 3 - FOLD 17 - Score: 1.7327404250064864
RUN 3 - FOLD 18 - Score: 1.7789274292588233
RUN 3 - FOLD 19 - Score: 1.752933837994933
RUN 3 - FOLD 20 - Score: 1.7323231181111187
RUN 3 - FOLD 21 - Score: 1.732413108916022
RUN 3 - FOLD 22 - Score: 1.7379386817477644
RU



RUN 4 - FOLD 0 - Score: 1.7495181746855377
RUN 4 - FOLD 1 - Score: 1.7435554268416018
RUN 4 - FOLD 2 - Score: 1.761466009209398
RUN 4 - FOLD 3 - Score: 1.7540976015159395
RUN 4 - FOLD 4 - Score: 1.7609539800994098
RUN 4 - FOLD 5 - Score: 1.749564764956478
RUN 4 - FOLD 6 - Score: 1.7466740840477868
RUN 4 - FOLD 7 - Score: 1.7701790187102742
RUN 4 - FOLD 8 - Score: 1.7451068313848228
RUN 4 - FOLD 9 - Score: 1.7575923100970685
RUN 4 - FOLD 10 - Score: 1.7541136665660888
RUN 4 - FOLD 11 - Score: 1.7326338529498317
RUN 4 - FOLD 12 - Score: 1.7540324728921988
RUN 4 - FOLD 13 - Score: 1.7179465674385428
RUN 4 - FOLD 14 - Score: 1.718480181518942
RUN 4 - FOLD 15 - Score: 1.7576137331563513
RUN 4 - FOLD 16 - Score: 1.7496044183764607
RUN 4 - FOLD 17 - Score: 1.7311342220362276
RUN 4 - FOLD 18 - Score: 1.7518638399639166
RUN 4 - FOLD 19 - Score: 1.7283367442358286
RUN 4 - FOLD 20 - Score: 1.741078114088159
RUN 4 - FOLD 21 - Score: 1.71592515832372
RUN 4 - FOLD 22 - Score: 1.753059500339441
RUN 4



RUN 5 - FOLD 0 - Score: 1.741217251711525
RUN 5 - FOLD 1 - Score: 1.726679528694134
RUN 5 - FOLD 2 - Score: 1.741251354839187
RUN 5 - FOLD 3 - Score: 1.7515247503472493
RUN 5 - FOLD 4 - Score: 1.7311779053043574
RUN 5 - FOLD 5 - Score: 1.7671172811798752
RUN 5 - FOLD 6 - Score: 1.7333916687737219
RUN 5 - FOLD 7 - Score: 1.7392429435579106
RUN 5 - FOLD 8 - Score: 1.7419450608594342
RUN 5 - FOLD 9 - Score: 1.7280439254445956
RUN 5 - FOLD 10 - Score: 1.7516663096249103
RUN 5 - FOLD 11 - Score: 1.7484276562496088
RUN 5 - FOLD 12 - Score: 1.7542855432454962
RUN 5 - FOLD 13 - Score: 1.75362581078941
RUN 5 - FOLD 14 - Score: 1.75868291479256
RUN 5 - FOLD 15 - Score: 1.7385233426103368
RUN 5 - FOLD 16 - Score: 1.7492531992313451
RUN 5 - FOLD 17 - Score: 1.7686805138224735
RUN 5 - FOLD 18 - Score: 1.7440042783669196
RUN 5 - FOLD 19 - Score: 1.7709701288477517
RUN 5 - FOLD 20 - Score: 1.7557297096140683
RUN 5 - FOLD 21 - Score: 1.7795236673657782
RUN 5 - FOLD 22 - Score: 1.7385491836220026
RUN 5



RUN 6 - FOLD 0 - Score: 1.7307475356417708
RUN 6 - FOLD 1 - Score: 1.7366444646297023
RUN 6 - FOLD 2 - Score: 1.760943123396486
RUN 6 - FOLD 3 - Score: 1.7502468388937413
RUN 6 - FOLD 4 - Score: 1.7625299808690325
RUN 6 - FOLD 5 - Score: 1.732933462617919
RUN 6 - FOLD 6 - Score: 1.7590449642166495
RUN 6 - FOLD 7 - Score: 1.7383241172325798
RUN 6 - FOLD 8 - Score: 1.77035482443613
RUN 6 - FOLD 9 - Score: 1.7589499453473836
RUN 6 - FOLD 10 - Score: 1.7653136537056415
RUN 6 - FOLD 11 - Score: 1.7310805438584649
RUN 6 - FOLD 12 - Score: 1.7701871397215874
RUN 6 - FOLD 13 - Score: 1.7479083925727754
RUN 6 - FOLD 14 - Score: 1.737034311768599
RUN 6 - FOLD 15 - Score: 1.744815677952487
RUN 6 - FOLD 16 - Score: 1.760995115374215
RUN 6 - FOLD 17 - Score: 1.7373707353798673
RUN 6 - FOLD 18 - Score: 1.7417296135080977
RUN 6 - FOLD 19 - Score: 1.7385783950369804
RUN 6 - FOLD 20 - Score: 1.7646814562985673
RUN 6 - FOLD 21 - Score: 1.7329325333382004
RUN 6 - FOLD 22 - Score: 1.7582536432715132
RUN 6



RUN 7 - FOLD 0 - Score: 1.7544787573795766
RUN 7 - FOLD 1 - Score: 1.762254940056242
RUN 7 - FOLD 2 - Score: 1.7592811382017097
RUN 7 - FOLD 3 - Score: 1.745668848152738
RUN 7 - FOLD 4 - Score: 1.7230476586460135
RUN 7 - FOLD 5 - Score: 1.7473594682300464
RUN 7 - FOLD 6 - Score: 1.7477026201272383
RUN 7 - FOLD 7 - Score: 1.746337569235824
RUN 7 - FOLD 8 - Score: 1.7568291659625248
RUN 7 - FOLD 9 - Score: 1.7312693859264254
RUN 7 - FOLD 10 - Score: 1.7447515771975741
RUN 7 - FOLD 11 - Score: 1.7568846367904916
RUN 7 - FOLD 12 - Score: 1.7232134665194898
RUN 7 - FOLD 13 - Score: 1.7275441290149465
RUN 7 - FOLD 14 - Score: 1.752001261764206
RUN 7 - FOLD 15 - Score: 1.7523034360473975
RUN 7 - FOLD 16 - Score: 1.7523761140378191
RUN 7 - FOLD 17 - Score: 1.7676280545345509
RUN 7 - FOLD 18 - Score: 1.720690797900781
RUN 7 - FOLD 19 - Score: 1.742922373504378
RUN 7 - FOLD 20 - Score: 1.728822519302368
RUN 7 - FOLD 21 - Score: 1.7475593825075775
RUN 7 - FOLD 22 - Score: 1.7483404518025927
RUN 7



RUN 8 - FOLD 0 - Score: 1.7411072360822
RUN 8 - FOLD 1 - Score: 1.7455569533593953
RUN 8 - FOLD 2 - Score: 1.7481616000272333
RUN 8 - FOLD 3 - Score: 1.748630465080496
RUN 8 - FOLD 4 - Score: 1.7623172811968253
RUN 8 - FOLD 5 - Score: 1.7171405802685766
RUN 8 - FOLD 6 - Score: 1.753091381133534
RUN 8 - FOLD 7 - Score: 1.722942130350042
RUN 8 - FOLD 8 - Score: 1.747429338132497
RUN 8 - FOLD 9 - Score: 1.7348067278992385
RUN 8 - FOLD 10 - Score: 1.7204942015120759
RUN 8 - FOLD 11 - Score: 1.7307275280305185
RUN 8 - FOLD 12 - Score: 1.7555018525933848
RUN 8 - FOLD 13 - Score: 1.7599348374772816
RUN 8 - FOLD 14 - Score: 1.7291036216570064
RUN 8 - FOLD 15 - Score: 1.7267119280202314
RUN 8 - FOLD 16 - Score: 1.7448509734226392
RUN 8 - FOLD 17 - Score: 1.769134441602975
RUN 8 - FOLD 18 - Score: 1.7006686967303977
RUN 8 - FOLD 19 - Score: 1.7589834731090814
RUN 8 - FOLD 20 - Score: 1.754305946911685
RUN 8 - FOLD 21 - Score: 1.7460897163012996
RUN 8 - FOLD 22 - Score: 1.7539587399959564
RUN 8 -



RUN 9 - FOLD 0 - Score: 1.726385811352171
RUN 9 - FOLD 1 - Score: 1.7336500754645094
RUN 9 - FOLD 2 - Score: 1.7360689300866798
RUN 9 - FOLD 3 - Score: 1.7538550220569595
RUN 9 - FOLD 4 - Score: 1.741073085008189
RUN 9 - FOLD 5 - Score: 1.7374278529230505
RUN 9 - FOLD 6 - Score: 1.7459243326876313
RUN 9 - FOLD 7 - Score: 1.7559070586538874
RUN 9 - FOLD 8 - Score: 1.7426679900875315
RUN 9 - FOLD 9 - Score: 1.718645129289478
RUN 9 - FOLD 10 - Score: 1.7568893102877774
RUN 9 - FOLD 11 - Score: 1.7395799117479473
RUN 9 - FOLD 12 - Score: 1.7554993208004162
RUN 9 - FOLD 13 - Score: 1.747813497968018
RUN 9 - FOLD 14 - Score: 1.7398739017911722
RUN 9 - FOLD 15 - Score: 1.7439345654584468
RUN 9 - FOLD 16 - Score: 1.7288912886213512
RUN 9 - FOLD 17 - Score: 1.741748846024275
RUN 9 - FOLD 18 - Score: 1.7619784358618782
RUN 9 - FOLD 19 - Score: 1.7393132331417873
RUN 9 - FOLD 20 - Score: 1.7333183532906695
RUN 9 - FOLD 21 - Score: 1.7437211430165918
RUN 9 - FOLD 22 - Score: 1.7445159406461752
RUN



RUN 10 - FOLD 0 - Score: 1.7534749600719661
RUN 10 - FOLD 1 - Score: 1.760622378748376
RUN 10 - FOLD 2 - Score: 1.7378667505159975
RUN 10 - FOLD 3 - Score: 1.749911285478156
RUN 10 - FOLD 4 - Score: 1.75036940949969
RUN 10 - FOLD 5 - Score: 1.7612640608730725
RUN 10 - FOLD 6 - Score: 1.7416364498622716
RUN 10 - FOLD 7 - Score: 1.7458448445564136
RUN 10 - FOLD 8 - Score: 1.733784867909737
RUN 10 - FOLD 9 - Score: 1.710636721428251
RUN 10 - FOLD 10 - Score: 1.7384394332906232
RUN 10 - FOLD 11 - Score: 1.742936234618537
RUN 10 - FOLD 12 - Score: 1.7383324570711702
RUN 10 - FOLD 13 - Score: 1.7418850100575947
RUN 10 - FOLD 14 - Score: 1.7484840980516747
RUN 10 - FOLD 15 - Score: 1.7325639225998892
RUN 10 - FOLD 16 - Score: 1.780222717535682
RUN 10 - FOLD 17 - Score: 1.7377553479680792
RUN 10 - FOLD 18 - Score: 1.7223075382271782
RUN 10 - FOLD 19 - Score: 1.7531426100088283
RUN 10 - FOLD 20 - Score: 1.7382479614801705
RUN 10 - FOLD 21 - Score: 1.7391082364078612
RUN 10 - FOLD 22 - Score: 1.



RUN 11 - FOLD 0 - Score: 1.7209308898532762
RUN 11 - FOLD 1 - Score: 1.7578936775508336
RUN 11 - FOLD 2 - Score: 1.7346417189636267
RUN 11 - FOLD 3 - Score: 1.7481450674338266
RUN 11 - FOLD 4 - Score: 1.7588483508406207
RUN 11 - FOLD 5 - Score: 1.7433668294204399
RUN 11 - FOLD 6 - Score: 1.7497806238827762
RUN 11 - FOLD 7 - Score: 1.7475916482778266
RUN 11 - FOLD 8 - Score: 1.7410806074859575
RUN 11 - FOLD 9 - Score: 1.7674635839993134
RUN 11 - FOLD 10 - Score: 1.7212561290450394
RUN 11 - FOLD 11 - Score: 1.7569845073306933
RUN 11 - FOLD 12 - Score: 1.758069977522362
RUN 11 - FOLD 13 - Score: 1.7530382259585895
RUN 11 - FOLD 14 - Score: 1.755620615543332
RUN 11 - FOLD 15 - Score: 1.757975988222286
RUN 11 - FOLD 16 - Score: 1.7396321902712808
RUN 11 - FOLD 17 - Score: 1.7545328380041756
RUN 11 - FOLD 18 - Score: 1.7366252258121968
RUN 11 - FOLD 19 - Score: 1.760552778155543
RUN 11 - FOLD 20 - Score: 1.7417975265663117
RUN 11 - FOLD 21 - Score: 1.7446104420535267
RUN 11 - FOLD 22 - Score

In [102]:
abs(log_loss(train_df['target'], train_result))

1.7283459834868942

In [103]:
submission = pd.DataFrame(train_result)
submission.columns = ['Class_1', 'Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6', 'Class_7', 'Class_8', 'Class_9']
submission['id'] = train_df['id']

submission.to_csv("cnn_ensemble_oe_train.csv", index=False)

In [104]:
submission = pd.read_csv("./sample_submission.csv")
submission['Class_1']=test_result[:,0]
submission['Class_2']=test_result[:,1]
submission['Class_3']=test_result[:,2]
submission['Class_4']=test_result[:,3]
submission['Class_5']=test_result[:,4]
submission['Class_6']=test_result[:,5]
submission['Class_7']=test_result[:,6]
submission['Class_8']=test_result[:,7]
submission['Class_9']=test_result[:,8]

submission.to_csv("submission_cnn_ensemble_oe_test.csv", index=False)