In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from collections import Counter

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.optimizers import *
from tensorflow.keras.models import Model
import tensorflow_addons as tfa
import tensorflow.keras.backend as K
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import log_loss
from tensorflow.keras.callbacks import *

In [None]:
train  = pd.read_csv("/kaggle/input/tabular-playground-series-jun-2021/train.csv")
train.drop(columns=["id"],inplace=True)
test = pd.read_csv("/kaggle/input/tabular-playground-series-jun-2021/test.csv")
sub = test.copy()
test = test.drop(columns=["id"])

In [None]:
lc = LabelEncoder()
enc = lc.fit_transform(train["target"])
train["target"] = enc
x = train.iloc[:,:-1]
y = train.iloc[:,-1]
ys = pd.get_dummies(y).values
ys.shape

In [None]:
scale = MinMaxScaler()
train_scale = scale.fit_transform(x)
test_scale  = scale.fit_transform(test)
train_scale.shape

In [None]:
def nn_model():
    inps = layers.Input(shape=(train_scale.shape[1],))
    x = layers.Dense(64,activation="relu")(inps)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(128,activation="relu")(x)
    x = layers.Dense(256,activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(256,activation="relu")(x)
    

    x = layers.Dense(9,activation="softmax")(x)
    
    m = Model(inps,x)
    return m

In [None]:
kf = StratifiedKFold(n_splits=4,shuffle=True)

In [None]:
cbs = [ModelCheckpoint("nnbest.h5",save_best_only=True,save_weights_only=True)]
mod = nn_model()
mod.compile(optimizer=RMSprop(momentum=0.03),loss="categorical_crossentropy",metrics=["acc"])

logsnn = []
for tri,tei in kf.split(train_scale,y):
    x_tr,x_te = train_scale[tri],train_scale[tei]
    y_tr,y_te = ys[tri],ys[tei]
    mod.fit(x_tr,y_tr,batch_size=256,epochs=7,validation_data=(x_te,y_te),callbacks=cbs)
    lloss = log_loss(y_te,mod.predict(x_te))
    logsnn.append(lloss)
    print(f"logloss :  {lloss}")
    
print(np.mean(logsnn))

In [None]:
logscb = []
yca =y.values

from catboost import CatBoostClassifier
for tri,tei in kf.split(train_scale,y):
    x_tr,x_te = train_scale[tri],train_scale[tei]
    y_tr,y_te = yca[tri],yca[tei]

    cb = CatBoostClassifier(iterations=1600,loss_function='MultiClass',early_stopping_rounds=3,verbose=1)
    cb.fit(x_tr,y_tr,eval_set=(x_te,y_te))
    
    lloss = log_loss(y_te,cb.predict_proba(x_te))
    logscb.append(lloss)
    print(f"logloss :  {lloss}")
    
print(np.mean(logscb))

In [None]:
predictions_nn = mod.predict(test_scale)
predictions_cb = cb.predict_proba(test_scale)
ensemble = np.clip(predictions_cb+predictions_nn,0,1)
names = ["Class_"+str(s) for s in range(1,10)]

subframe = pd.DataFrame(ensemble,columns=names)
subframe.insert(loc=0,column="id",value=sub["id"])
subframe.to_csv("submission.csv",index=False)