# Version 3 : LightAutoML
# Version 6 : Neural Network 

Refrence notebook - https://www.kaggle.com/pourchot/simple-neural-network

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import StratifiedKFold
from sklearn import preprocessing
from sklearn.metrics import log_loss

import gc
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow.keras.backend as K
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import activations,callbacks
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import initializers

from keras.models import Model

In [None]:
train = pd.read_csv('../input/tabular-playground-series-jun-2021/train.csv')
test = pd.read_csv("../input/tabular-playground-series-jun-2021/test.csv")
submission = pd.read_csv("../input/tabular-playground-series-jun-2021/sample_submission.csv")
submission = submission.set_index('id')

# EDA

In [None]:
train.head()

# Checking null values

In [None]:
print(f'Number of rows: {train.shape[0]};  Number of columns: {train.shape[1]}; No of missing values: {sum(train.isna().sum())}')

In [None]:
print(f'Number of rows: {test.shape[0]};  Number of columns: {test.shape[1]}; No of missing values: {sum(test.isna().sum())}')

In [None]:
train.info()

In [None]:
train.describe()

# Target Distribution

In [None]:
target_mass = train['target'].value_counts()
values = target_mass.values.tolist()
indexes = target_mass.index.tolist()

ax,fig = plt.subplots(1,2,figsize=(15,6))
plt.subplot(1,2,1)
plt.pie(values , labels = indexes)
plt.subplot(1,2,2)
plt.bar(indexes,values)
plt.show()

# Correlation

In [None]:
fet_set = train.drop(labels=['id','target'],axis=1)
def plot_diag_heatmap(data):
    corr = data.corr()
    mask = np.triu(np.ones_like(corr, dtype=bool))
    f, ax = plt.subplots(figsize=(11, 9))
    sns.heatmap(corr, mask=mask, cmap='YlGnBu', center=0,square=True, linewidths=1, cbar_kws={"shrink": 1.0})
plot_diag_heatmap(fet_set)

In [None]:
train1 = pd.read_csv('../input/tabular-playground-series-jun-2021/train.csv', index_col = 'id')
test1 = pd.read_csv('../input/tabular-playground-series-jun-2021/test.csv', index_col = 'id')
x_cols = train1.columns[0:-1].tolist()
y_col = train1.columns[-1]

In [None]:
import plotly.express as px

target_column = 'target'
num_rows, num_cols = 15,5
f, axes = plt.subplots(nrows=num_rows, ncols=num_cols, figsize=(30, 60))

for index, column in enumerate(x_cols):
    i,j = (index // num_cols, index % num_cols)

    sns.kdeplot(train1.loc[train1[target_column] == 'Class_1', column], color=px.colors.qualitative.G10[1], shade=True, ax=axes[i,j])
    sns.kdeplot(train1.loc[train1[target_column] == 'Class_2', column], color=px.colors.qualitative.G10[2], shade=True, ax=axes[i,j])
    sns.kdeplot(train1.loc[train1[target_column] == 'Class_3', column], color=px.colors.qualitative.G10[9], shade=True, ax=axes[i,j])
    sns.kdeplot(train1.loc[train1[target_column] == 'Class_4', column], color=px.colors.qualitative.G10[4], shade=True, ax=axes[i,j])
    sns.kdeplot(train1.loc[train1[target_column] == 'Class_5', column], color=px.colors.qualitative.G10[5], shade=True, ax=axes[i,j])
    sns.kdeplot(train1.loc[train1[target_column] == 'Class_6', column], color=px.colors.qualitative.G10[6], shade=True, ax=axes[i,j])
    sns.kdeplot(train1.loc[train1[target_column] == 'Class_7', column], color=px.colors.qualitative.G10[7], shade=True, ax=axes[i,j])
    sns.kdeplot(train1.loc[train1[target_column] == 'Class_8', column], color=px.colors.qualitative.G10[8], shade=True, ax=axes[i,j])
    sns.kdeplot(train1.loc[train1[target_column] == 'Class_9', column], color=px.colors.qualitative.G10[3], shade=True, ax=axes[i,j])
plt.tight_layout()
plt.show()

In [None]:
targets = pd.get_dummies(train['target'])

In [None]:
def custom_metric(y_true, y_pred):
    y_pred = K.clip(y_pred, 1e-15, 1-1e-15)
    loss = K.mean(cce(y_true, y_pred))
    return loss

cce = tf.keras.losses.CategoricalCrossentropy()

es = tf.keras.callbacks.EarlyStopping(
    monitor='val_custom_metric', min_delta=1e-05, patience=5, verbose=0,
    mode='min', baseline=None, restore_best_weights=True)

plateau = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_custom_metric', factor=0.7, patience=2, verbose=0,
    mode='min')

In [None]:
def conv_model():

    conv_inputs = layers.Input(shape = (75))
    embed = layers.Embedding (input_dim = 354, 
                              output_dim = 7,
                              embeddings_regularizer='l2')(conv_inputs)
    embed = layers.Conv1D(12,1,activation = 'relu')(embed)        
    embed = layers.Flatten()(embed)
    hidden = layers.Dropout(0.3)(embed)
    
    hidden = tfa.layers.WeightNormalization(
                layers.Dense(
                units=32,
                activation ='relu',
                kernel_initializer = "lecun_normal"))(hidden)
    
    output = layers.Dropout(0.3)(layers.Concatenate()([embed, hidden]))
    output = tfa.layers.WeightNormalization(
    layers.Dense(
                units = 32,
                activation='relu',
                kernel_initializer = "lecun_normal"))(output) 
    output = layers.Dropout(0.4)(layers.Concatenate()([embed, hidden, output]))
    output = tfa.layers.WeightNormalization(
    layers.Dense(
                units = 32, 
                activation = 'relu',
                kernel_initializer = "lecun_normal"))(output)
    
    conv_outputs = layers.Dense(
                units = 9, 
                activation ='softmax',
                kernel_initializer ="lecun_normal")(output)
    
    model = Model(conv_inputs,conv_outputs)
    
    return model

In [None]:
oof_NN_a = np.zeros((train.shape[0],9))
pred_NN_a = np.zeros((test.shape[0],9))

N_FOLDS = 25
SEED = 2021
EPOCH = 75


skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)

for fold, (tr_idx, ts_idx) in enumerate(skf.split(train,train.iloc[:,-1])):
    print(f"\n ====== TRAINING FOLD {fold} =======\n")

    X_train = train.iloc[:,1:-1].iloc[tr_idx]
    y_train = targets.iloc[tr_idx]
    X_test = train.iloc[:,1:-1].iloc[ts_idx]
    y_test = targets.iloc[ts_idx]

    K.clear_session()
    
    print("\n-----Convolution model Training----\n")

    model_conv = conv_model()

    model_conv.compile(loss='categorical_crossentropy', 
                            optimizer = keras.optimizers.Adam(learning_rate=2e-4), 
                            metrics=custom_metric)
    model_conv.fit(X_train, y_train,
              batch_size = 256, epochs = EPOCH,
              validation_data=(X_test, y_test),
              callbacks=[es, plateau],
              verbose = 0)
   
    pred_a = model_conv.predict(X_test) 
    oof_NN_a[ts_idx] += pred_a 
    score_NN_a = log_loss(y_test, pred_a)
    print(f"\nFOLD {fold} Score convolution model: {score_NN_a}\n")
    pred_NN_a += model_conv.predict(test.iloc[:,1:]) / N_FOLDS 
 
score_a = log_loss(targets, oof_NN_a)
print(f"\n=== FINAL SCORE CONVOLUTION MODEL : {score_a}===\n")

In [None]:
pred_embedding = pred_NN_a

In [None]:
submission = pd.read_csv("../input/tabular-playground-series-jun-2021/sample_submission.csv")
submission['Class_1']=pred_embedding[:,0]
submission['Class_2']=pred_embedding[:,1]
submission['Class_3']=pred_embedding[:,2]
submission['Class_4']=pred_embedding[:,3]
submission['Class_5']=pred_embedding[:,4]
submission['Class_6']=pred_embedding[:,5]
submission['Class_7']=pred_embedding[:,6]
submission['Class_8']=pred_embedding[:,7]
submission['Class_9']=pred_embedding[:,8]

In [None]:
submission.to_csv("Solution.csv", index=False)