In [None]:
import os
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
import seaborn as sns
import category_encoders as ce

from contextlib import contextmanager
from time import time
from tqdm import tqdm

from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.callbacks import EarlyStopping
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils

from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report, log_loss, accuracy_score, mean_squared_error

In [None]:
import tensorflow as tf
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Device:', tpu.master())
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except:
    strategy = tf.distribute.get_strategy()
print('Number of replicas:', strategy.num_replicas_in_sync)


In [None]:
train0 = pd.read_csv("../input/tabular-playground-series-jun-2021/train.csv")
train0[0:12].T

In [None]:
train0.info()

In [None]:
Name=train0['target'].unique()
Name2=sorted(Name)
print(Name2)
m=train0['target'].nunique()
print(m)

In [None]:
N=[]
for i in range(m):
     N+=[i]
normal_mapping=dict(zip(Name2,N)) 
reverse_mapping=dict(zip(N,Name2)) 

def mapper(value):
    return reverse_mapping[value]

In [None]:
test0 = pd.read_csv("../input/tabular-playground-series-jun-2021/test.csv")
test0[0:12].T

In [None]:
train0.columns

In [None]:
trainy=train0['target'].map(normal_mapping)
train2=train0.drop('target',axis=1)

In [None]:
data2=pd.concat([train2,test0])
data3=data2.set_index('id')
data3

In [None]:
n=len(train2)
print(n)

In [None]:
train=data3[0:n]
test=data3[n:]

In [None]:
df_columns = list(data3.columns.values)
print(df_columns)

In [None]:
train_df=pd.DataFrame(train)
test_df=pd.DataFrame(test)

In [None]:
train_df.columns=df_columns
test_df.columns=df_columns

In [None]:
def create_numeric_feature(input_df):
    use_columns = df_columns 
    return input_df[use_columns].copy()

In [None]:
class Timer:
    def __init__(self, logger=None, format_str='{:.3f}[s]', prefix=None, suffix=None, sep=' '):

        if prefix: format_str = str(prefix) + sep + format_str
        if suffix: format_str = format_str + sep + str(suffix)
        self.format_str = format_str
        self.logger = logger
        self.start = None
        self.end = None

    @property
    def duration(self):
        if self.end is None:
            return 0
        return self.end - self.start

    def __enter__(self):
        self.start = time()

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.end = time()
        out_str = self.format_str.format(self.duration)
        if self.logger:
            self.logger.info(out_str)
        else:
            print(out_str)

In [None]:
def to_feature(input_df):
    
    processors = [
        create_numeric_feature,
    ]    
    out_df = pd.DataFrame()
    
    for func in tqdm(processors, total=len(processors)):
        with Timer(prefix='create' + func.__name__ + ' '):
            _df = func(input_df)

        assert len(_df) == len(input_df), func.__name__
        out_df = pd.concat([out_df, _df], axis=1)
        
    return out_df

In [None]:
train_feat_df = to_feature(train_df)
test_feat_df = to_feature(test_df)

In [None]:
X=train_feat_df.values.reshape(-1,5,15,1)
test=test_feat_df.values.reshape(-1,5,15,1)

In [None]:
y0 = trainy
binencoder = LabelBinarizer()
y = binencoder.fit_transform(y0)
print(y[0:5])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=90)

In [None]:
model = Sequential()

model.add(Conv2D(64,(2,2),input_shape=(5,15,1),activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(128,(2,2),activation='relu'))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128,activation='relu'))
model.add(Dense(64,activation='relu'))
model.add(Dense(9, activation='softmax'))

In [None]:
model.summary()

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])

In [None]:
result = model.fit(X_train, y_train, validation_split=0.2, epochs=50, batch_size=92, verbose=2)

In [None]:
hist = pd.DataFrame(result.history)
hist.loc[:, ['accuracy', 'val_accuracy']].plot()
hist.loc[:, ['loss', 'val_loss']].plot()

In [None]:
pred0=model.predict(test)
pred1=np.array(pred0)
print(pred0.shape)
print(pred0[0])

In [None]:
sample=pd.read_csv('../input/tabular-playground-series-jun-2021/sample_submission.csv')
sample

In [None]:
submit=sample
submit[ Name2 ]=pred1
submit.to_csv('submission.csv', index=None)
submit