In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("../input/tabular-playground-series-dec-2021/train.csv").drop(columns=['Soil_Type7', 'Soil_Type15']) 
test_df = pd.read_csv("../input/tabular-playground-series-dec-2021/test.csv").drop(columns=['Soil_Type7', 'Soil_Type15']) 

In [None]:
df.head()

## check for null values

In [None]:
df.isnull().sum()

In [None]:
df.groupby('Cover_Type').size()

In [None]:
X = df.drop(['Id','Cover_Type'],axis = 1)
y = df.Cover_Type
len(y.unique())

In [None]:
X.head(2)

In [None]:
sns.histplot(X.Elevation)

In [None]:
sns.histplot(X.Aspect)

In [None]:
sns.histplot(X.Slope)

In [None]:
from sklearn.preprocessing import StandardScaler,LabelEncoder
sc = StandardScaler()
le = LabelEncoder()
y = le.fit_transform(y)
X = sc.fit_transform(X)

In [None]:
set(y)

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_val,y_train,y_val = train_test_split(X,y,test_size = 0.02,random_state = 42)
X_test,X_,y_test,y_ = train_test_split(X_val,y_val,test_size = 0.000002,random_state = 42)

In [None]:
len(X_train),len(X_val),len(X_test)

In [None]:
X_train.shape,y_train.shape

In [None]:
EPOCHS = 90 
VERBOSE = 2 
RUNS = 1 
BATCH_SIZE = 512 

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except:
    pass
print('Num',strategy.num_replicas_in_sync)

In [None]:
def model_fun(X):
    il = tf.keras.layers.Input(shape=(X.shape[-1]))
    x = tf.keras.layers.Dense(128, activation='relu')(il)
    x1 = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dense(64, activation='relu')(x1)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.2)(tf.keras.layers.Concatenate()([x, x1]))
    x = tf.keras.layers.Dense(units=64, activation='relu')(x) 
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dense(64, activation='relu')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    output = tf.keras.layers.Dense(7, activation="softmax")(x)
    model = tf.keras.Model(inputs = il,outputs = output)
    return model
    

In [None]:
lr =tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=5, verbose=VERBOSE)
es = tf.keras.callbacks.EarlyStopping(monitor="val_acc", patience=10, verbose=VERBOSE, mode="max", restore_best_weights=True)

In [None]:
with strategy.scope():
    model = model_fun(X_train)
    model.compile(loss="sparse_categorical_crossentropy", optimizer=tf.keras.optimizers.Adam(), metrics=["accuracy"])
    history = model.fit(X_train,y_train,
                        verbose=VERBOSE,
                        shuffle=True,
                        batch_size=BATCH_SIZE,epochs=EPOCHS
                        ,use_multiprocessing=True,
                        callbacks=[lr,es],
                        validation_data=(X_val,y_val))

In [None]:
y_predict = model.predict(X_test)
y_predict = le.inverse_transform(np.argmax(y_predict,axis = 1))

In [None]:
from sklearn.metrics import confusion_matrix,accuracy_score
plt.figure(figsize=(10,10))
cm = confusion_matrix(y_test,y_predict)
sns.heatmap(cm,annot=True,fmt='.2f')

In [None]:
accuracy_score(y_test,y_predict)*100

In [None]:
test_df.head()

In [None]:
Ids = test_df.Id
test_df.drop('Id',axis = 1,inplace = True)

In [None]:
sub_x = sc.fit_transform(test_df)

In [None]:
sub_x.shape

In [None]:
sub_predict = model.predict(sub_x)
sub_predict = le.inverse_transform(np.argmax(sub_predict,axis = 1))

In [None]:
sub = pd.DataFrame()

In [None]:
sub['Id'] = Ids
sub['Cover_Type'] = sub_predict

In [None]:
sub.head()

In [None]:
sub.to_csv('submission_dec.csv',index = False)