In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

### Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
main_dir = '../input/tabular-playground-series-jun-2021/'
train_df = pd.read_csv(os.path.join(main_dir,'train.csv'))
test_df = pd.read_csv(os.path.join(main_dir,'test.csv'))
sample_sub = pd.read_csv(os.path.join(main_dir,'sample_submission.csv'))

In [None]:
train_df.head()

In [None]:
test_df.head()

In [None]:
sample_sub.head()

In [None]:
train_df.isnull().sum()

In [None]:
fig = plt.figure(figsize=(10, 6))
sns.countplot(x="target", data=train_df)

### Target Distribution

In [None]:
target_mass = train_df['target'].value_counts()
values = target_mass.values.tolist()
indexes = target_mass.index.tolist()

ax,fig = plt.subplots(1,2,figsize=(15,6))
plt.subplot(1,2,1)
plt.pie(values , labels = indexes)
plt.subplot(1,2,2)
plt.bar(indexes,values)
plt.show()

### Correlation

In [None]:
fet_set = train_df.drop(labels=['id','target'],axis=1)
def plot_diag_heatmap(data):
    corr = data.corr()
    mask = np.triu(np.ones_like(corr, dtype=bool))
    f, ax = plt.subplots(figsize=(11, 9))
    sns.heatmap(corr, mask=mask, cmap='YlGnBu', center=0,square=True, linewidths=1, cbar_kws={"shrink": 1.0})
plot_diag_heatmap(fet_set)

### Converting classes to categorical data

In [None]:
labels= pd.get_dummies(train_df['target'])

In [None]:
labels

### Model Building

In [None]:
from sklearn.model_selection import StratifiedKFold
from sklearn import preprocessing
from sklearn.metrics import log_loss

import gc
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow.keras.backend as K
from tensorflow import keras
from tensorflow.keras.layers import Input, Embedding, Concatenate, Conv1D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import initializers

from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.models import Model

In [None]:
def custom_metric(y_true, y_pred):
    y_pred = K.clip(y_pred, 1e-15, 1-1e-15)
    loss = K.mean(cce(y_true, y_pred))
    return loss

cce = tf.keras.losses.CategoricalCrossentropy()

earlystop = EarlyStopping(
    monitor='val_custom_metric', 
    min_delta=1e-05, 
    patience=10, 
    verbose=0,
    mode='min', 
    baseline=None, 
    restore_best_weights=True
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_custom_metric', 
    factor=0.7, 
    patience=10, 
    verbose=0,
    mode='min'
)

In [None]:
def conv_model():
    conv_in = Input(shape=(75))
    embed = Embedding(
        input_dim= 354,
        output_dim = 7,
        embeddings_regularizer='l2'
    )(conv_in)
    embed = Conv1D(12,1,activation='relu')(embed)
    embed = Flatten()(embed)
    hidden = Dropout(0.2)(embed)
    
    hidden = tfa.layers.WeightNormalization(
        Dense(32,activation='selu',kernel_initializer='lecun_normal'))(hidden)
    output = Dropout(0.3)(Concatenate()([embed,hidden]))
    output = tfa.layers.WeightNormalization(
        Dense(32,activation='relu',kernel_initializer='lecun_normal'))(output)
    conv_out = Dense(9,activation='softmax',kernel_initializer='lecun_normal')(output)
    
    model = Model(conv_in,conv_out)
    
    return model

In [None]:
oof_NN_a = np.zeros((train_df.shape[0],9))
pred_NN_a = np.zeros((test_df.shape[0],9))

folds = 25
Seed = 2021
epochs = 60

In [None]:
skf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=Seed)

for fold, (tr_idx, te_idx) in enumerate(skf.split(train_df,train_df.iloc[:,-1])):
    print(f"===Training Fold: {fold}===\n")
    
    X_train = train_df.iloc[:,1:-1].iloc[tr_idx]
    y_train = labels.iloc[tr_idx]
    X_test = train_df.iloc[:,1:-1].iloc[te_idx]
    y_test = labels.iloc[te_idx]
    
    K.clear_session()
    
    print("\n===CNN Training===\n")
    
    model = conv_model()
    model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=2e-4), metrics=custom_metric)
    
    model.fit(
        X_train,y_train,
        batch_size=256,
        epochs=epochs,
        validation_data= (X_test,y_test),
        callbacks= [earlystop, reduce_lr],
        verbose = 1
    )
    
    pred_a = model.predict(X_test)
    oof_NN_a[te_idx] += pred_a
    
    score_NN_a = log_loss(y_test,pred_a)
    print(f"\n Fold: {fold} Score conv model: {score_NN_a}")
    pred_NN_a += model.predict(test_df.iloc[:,1:]) / folds
    
score_a = log_loss(labels, oof_NN_a)
print(f"\n===Final Score: {score_a}===\n")

In [None]:
pred_embedding = pred_NN_a

In [None]:
submission = sample_sub
submission['Class_1']=pred_embedding[:,0]
submission['Class_2']=pred_embedding[:,1]
submission['Class_3']=pred_embedding[:,2]
submission['Class_4']=pred_embedding[:,3]
submission['Class_5']=pred_embedding[:,4]
submission['Class_6']=pred_embedding[:,5]
submission['Class_7']=pred_embedding[:,6]
submission['Class_8']=pred_embedding[:,7]
submission['Class_9']=pred_embedding[:,8]

In [None]:
submission.to_csv("attempt2.csv", index=False)