# Import Libraries

In [None]:
!pip install autokeras
!pip install keras-tuner
!pip install cloud-tpu-client

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from autokeras import StructuredDataClassifier
import kerastuner
import tensorflow_addons as tfa

train_path = '../input/tabular-playground-series-jun-2021/train.csv'
test_path = '../input/tabular-playground-series-jun-2021/test.csv'
RS = 69420
sns.set_style('darkgrid')
DEVICE ='GPU'

In [None]:
from cloud_tpu_client import Client
print(tf.__version__)

# Client().configure_tpu_version(tf.__version__, restart_type='always')

if DEVICE == "TPU":
    print("connecting to TPU...")
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU ', tpu.master())
    except ValueError:
        print("Could not connect to TPU")
        tpu = None

    if tpu:
        try:
            print("initializing  TPU ...")
            tf.config.experimental_connect_to_cluster(tpu)
            tf.tpu.experimental.initialize_tpu_system(tpu)
            strategy = tf.distribute.experimental.TPUStrategy(tpu)
            print("TPU initialized")
        except _:
            print("failed to initialize TPU")
    else:
        DEVICE = "GPU"

if DEVICE != "TPU":
    print("Using default strategy for CPU and single GPU")
    strategy = tf.distribute.get_strategy()

if DEVICE == "GPU":
    print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
    

AUTO     = tf.data.experimental.AUTOTUNE
REPLICAS = strategy.num_replicas_in_sync
print(f'REPLICAS: {REPLICAS}')

# Preprocess

In [None]:
df = pd.read_csv(train_path, index_col=0)

In [None]:
from sklearn.preprocessing import OrdinalEncoder

oe = OrdinalEncoder(dtype=np.int32)
df['target'] = oe.fit_transform(df['target'].values.reshape(-1,1))

**Class Value Counts**

In [None]:
sns.countplot(df.target)

In [None]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

X.shape, y.shape

# Stratified K Fold Split

In [None]:
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold(n_splits=10, random_state=RS, shuffle=True)
print(cv)

for train_index, test_index in cv.split(X, y):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

In [None]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

# Fix Massive Class Imbalance

**From the data description I can see that this dataset is a CTGAN synthetic of the shopping dataset thus I can assume it to be IID**

In [None]:
# from imblearn.combine import SMOTETomek
# from collections import Counter

# sme = SMOTETomek(random_state=RS, n_jobs=-1)

# print('Original Train Set Shape %s' % Counter(y_train))
# X_train, y_train = sme.fit_resample(X_train, y_train)
# print('Resampled dataset shape %s' % Counter(y_train))

# print('Original Test Set Shape %s' % Counter(y_test))
# X_test, y_test = sme.fit_sample(X_test, y_test)
# print('Resampled dataset shape %s' % Counter(y_test))

In [None]:
# from sklearn.utils.class_weight import compute_class_weight

# class_weights = compute_class_weight('balanced',
#                                      np.unique(y_train),
#                                      y_train)

# class_weights = dict(enumerate(class_weights))

**MinMaxScale it as that is the norm for Neural Networks**

In [None]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# AutoKeras Model

In [None]:
# Initialize the structured data classifier.
PR = tf.keras.metrics.AUC(name='prc', curve='PR')
AUC = tf.keras.metrics.AUC()

# I ran for 15 trials just to start, more trials == more better
clf = StructuredDataClassifier(overwrite=True,
                                  project_name='TPS_AK',
                                  objective=kerastuner.Objective("val_loss", direction="min"),
                                  seed=RS,
                                  max_trials=100)

**If you are using Neural Networks but not applying Early Stopping, wtf is wrong with you?**

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
es = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
ts = tfa.callbacks.TimeStopping(seconds=3600*5)

In [None]:
# One-Hot-Encoding Experiments
y_train_oh = tf.keras.utils.to_categorical(y_train)
y_test_oh = tf.keras.utils.to_categorical(y_test)

I do not know much about TPU's so please drop suggestions below

In [None]:
%%time
with strategy.scope():
    clf.fit(X_train, y_train_oh,
            epochs=75,
            batch_size=1024,
            validation_split=0.2,
            callbacks=[es],
            verbose=1)

In [None]:
model = clf.export_model()
model.summary()

In [None]:
with strategy.scope()
    clf.evaluate(X_test, y_test_oh)

# Predict New

In [None]:
test = pd.read_csv(test_path, index_col=0)

In [None]:
submission = pd.read_csv('../input/tabular-playground-series-jun-2021/sample_submission.csv', index_col=0)

In [None]:
test = sc.transform(test.values)

In [None]:
preds = model.predict(test)

In [None]:
preds

In [None]:
submission['Class_1']=preds[:,0]
submission['Class_2']=preds[:,1]
submission['Class_3']=preds[:,2]
submission['Class_4']=preds[:,3]
submission['Class_5']=preds[:,4]
submission['Class_6']=preds[:,5]
submission['Class_7']=preds[:,6]
submission['Class_8']=preds[:,7]
submission['Class_9']=preds[:,8]
submission.head()

In [None]:
submission.to_csv('submission.csv')

In [None]:
# import time
# from IPython.display import display, Javascript
# display(Javascript('IPython.notebook.save_checkpoint();'))
# time.sleep(10)

# import os
# os.system('shutdown -s')