In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from keras.layers import Dense, Dropout
from keras.models import Sequential
from keras.metrics import RootMeanSquaredError
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score

## Load datasets

In [None]:
train_dataset = pd.read_csv("../input/tabular-playground-series-feb-2021/train.csv")
test_dataset = pd.read_csv("../input/tabular-playground-series-feb-2021/test.csv")
train_dataset.drop(labels=['id'], axis=1, inplace=True)
test_dataset.drop(labels=['id'], axis=1, inplace=True)
print("shape of train_dataset: ", train_dataset.shape)
print("shape of test_dataset: ", test_dataset.shape)

In [None]:
train_dataset.head()

In [None]:
train_dataset.info()

In [None]:
test_dataset.head()

In [None]:
test_dataset.info()

In [None]:
def encoding(dataset):
    le = LabelEncoder()
    for i in range(10):
        dataset.iloc[:,i] = le.fit_transform(dataset.iloc[:,i])
    return dataset

In [None]:
train_dataset = encoding(train_dataset)
test_dataset = encoding(test_dataset)

## Data Analysis

In [None]:
corr_matrix = train_dataset.corr()
sns.clustermap(corr_matrix, annot=True, fmt=".2f")
plt.title("Correlation Between Features")
plt.show()

In [None]:
train_dataset.corr()['target'].sort_values().plot(kind="bar")
plt.show()
print(train_dataset.corr()['target'].sort_values())

## Split of train and test 

In [None]:
Y_train = train_dataset['target'].values
X_train = train_dataset.drop(labels=['target', 'cont2', 'cont9', 'cont12', 'cat4'], axis=1).values
X_test = test_dataset.drop(labels=['cont2', 'cont9', 'cont12', 'cat4'], axis=1).values
print("shape of X_train", X_train.shape)
print("shape of Y_train", Y_train.shape)
print("shape of X_test", X_test.shape)

In [None]:
ohe = ColumnTransformer([("ohe", OneHotEncoder(dtype=float), [1])], remainder="passthrough")
X_train = ohe.fit_transform(X_train)
X_test = ohe.transform(X_test)

In [None]:
x_train, x_val, y_train, y_val = train_test_split(X_train, Y_train, random_state=0, test_size=0.2)

## Scaling

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
x_train = scaler.fit_transform(x_train)
x_val = scaler.transform(x_val)
X_test = scaler.fit_transform(X_test)

In [None]:
print(x_train.shape)
print(x_val.shape)
print(y_train.shape)
print(y_val.shape)

## Create and fit Model

In [None]:
def create_model():
    model = Sequential()
    model.add(Dense(44, activation="tanh", input_dim=21))
    model.add(Dropout(0.2))
    model.add(Dense(22, activation="tanh"))
    model.add(Dropout(0.2))
    model.add(Dense(11, activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    model.compile(optimizer="adam", loss="mse", metrics=['mae'])
    return model

In [None]:
model = create_model()
model.summary()

In [None]:
hist = model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=50, batch_size=128) 

In [None]:
plt.plot(hist.history['loss'], label='Training Loss')
plt.plot(hist.history['val_loss'], label='Validation Loss')
plt.legend()
plt.show()
plt.plot(hist.history['mae'], label='Training rmse')
plt.plot(hist.history['val_mae'], label='Validation rmse')
plt.legend()
plt.show()

## Prediction

In [None]:
y_pred = model.predict(x_val)
y_true = np.int64(y_val)
y_true = y_true.reshape(60000 , 1)
y_true = y_true.round()
y_pred = y_pred.round()
cm = confusion_matrix(y_true, y_pred)
score = accuracy_score(y_true, y_pred)
print("accuracy_score: ", score)
f, ax = plt.subplots(figsize=(8, 8))
sns.heatmap(cm, annot=True, linewidths=0.01, cmap="Blues", linecolor='green', fmt=".2f", ax=ax)
plt.xlabel("Predict")
plt.ylabel("True")
plt.title("Confusion matrix")
plt.show()

In [None]:
prediction = model.predict(X_test)
pred = pd.DataFrame(prediction)
sub = pd.read_csv("../input/tabular-playground-series-feb-2021/sample_submission.csv")
sub['target'] = prediction.ravel()
sub.to_csv('sample_submission.csv', index=False)