In [None]:
%%time
# Data Wranglers
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Helper libraries
from warnings import filterwarnings
from gc import collect
filterwarnings('ignore')
pd.set_option('display.max_columns', 125)

In [None]:
%%time

file_paths = [
    '/kaggle/input/tabular-playground-series-sep-2021/train.csv',
    '/kaggle/input/tabular-playground-series-sep-2021/test.csv',
    '/kaggle/input/tabular-playground-series-sep-2021/sample_solution.csv',
]

X_train, X_test, sample_sub = [ pd.read_csv(_) for _ in file_paths ]

In [None]:
X_train.head()

In [None]:
%%time

y_train = X_train.claim
X_train.drop(labels=['claim', 'id'], axis=1, inplace=True)
X_test.drop(labels=['id'], axis=1, inplace=True)

In [None]:
print(f'X_train.shape = {X_train.shape}')
print(f'X_test.shape  = {X_test.shape}')

In [None]:
(X_train.isna().sum() * 100 / X_train.shape[0]).sort_values(ascending=False)

In [None]:
len(X_train.isna().sum(axis=1).loc[X_train.isna().sum(axis=1) > 0]) * 100 / X_train.shape[0]

In [None]:
y_train.value_counts()

In [None]:
y_train.value_counts(normalize=True) * 100

In [None]:
%%time

from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
import torch
from torch import nn

def data_prep(X_train, y_train, X_test):
    # Transformer Instances
    std_scale   = StandardScaler()
    simp_impute = SimpleImputer()
    # Fit and Transform Train Data
    X_train = std_scale.fit_transform(X_train)
    X_train = simp_impute.fit_transform(X_train)
    # Tansform Test Data
    X_test  = std_scale.transform(X_test)
    X_test  = simp_impute.transform(X_test)
    # Split Train Data To Train and Validation Data
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, stratify=y_train, random_state=6755)
    # Convert to Torch Tesor
    X_train = torch.from_numpy(X_train.astype(np.float32))
    X_val   = torch.from_numpy(X_val.astype(np.float32))
    X_test  = torch.from_numpy(X_test.astype(np.float32))
    y_train = torch.from_numpy(y_train.values.astype(np.float32))
    y_val   = torch.from_numpy(y_val.values.astype(np.float32))
    return (X_train, y_train), (X_val, y_val), X_test
     

In [None]:
%%time

(X_train, y_train), (X_val, y_val), X_test = data_prep(X_train, y_train, X_test)
# call Garbage Collector To Crear Space
collect()

In [None]:
from torch.utils.data import Dataset, DataLoader

class DATASET(Dataset):
    
    def __init__(self, X, y=None):
        self.X = X
        self.y = y
    
    def __getitem__(self, index):
        if self.y is not None:
            return self.X[index], self.y[index]
        else:
            return self.X[index]
    
    def __len__(self):
        return self.X.shape[0]

In [None]:
train_data   = DATASET(X_train, y_train)
train_loader = DataLoader(train_data, batch_size=1_000, shuffle=True)

In [None]:
input_size  = X_train.shape[1]
output_size = 1
print(f'input_size  = {input_size}')
print(f'output_size = {output_size}')

In [None]:
%%time

import torch
from torch import nn
import torch.nn.functional as F

In [None]:
class clfModel(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            # Layer 1
            nn.Dropout(p=0.2),
            nn.Linear(input_size, 100),
            nn.BatchNorm1d(100),
            nn.PReLU(100),
            # Layer 2
            nn.Linear(100, 50),
            nn.BatchNorm1d(50),
            nn.PReLU(50),
            # Final Layer 3
            nn.Linear(50, 1),
            nn.Sigmoid(),
        )
        
    def forward(self, X):
        return self.layers(X)

In [None]:
def ann_model(model, epoches=10, lr=0.001):
    
    model     = model
    Optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    Loss      = nn.BCELoss()
    
    for epoch in range(1, epoches+1):
        
        for X_batch, y_batch in train_loader:
            model.train()
            y_pred = model(X_batch)
            loss   = Loss(y_pred, y_batch.view(-1,1))
            
            Optimizer.zero_grad()
            loss.backward()
            Optimizer.step()
            
        with torch.no_grad():
            model.eval()
            y_pred_val = model(X_val)
            loss_val   = Loss(y_pred_val, y_val.view(-1,1))
            print(f'Epoch : {epoch}/{epoches},  Train Loss = {loss.item():.6f},  Val Loss = {loss_val.item():.6f}')
    
    return model

In [None]:
%%time

ann_model_1 = ann_model(clfModel(), epoches=5, lr=1e-4)

In [None]:
def safe_predict(model, X):
    with torch.no_grad():
        model.eval()
        y_pred = model(X)
        return y_pred.numpy()

In [None]:
y_pred = safe_predict(ann_model_1, X_val)

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
pd.DataFrame(confusion_matrix(np.where(y_pred>0.5,1,0), y_val))

In [None]:
pd.DataFrame(confusion_matrix(np.where(y_pred>0.5,1,0), y_val, normalize='true') * 100).round(2)

In [None]:
y_pred = safe_predict(ann_model_1, X_test)

In [None]:
sample_sub.claim = y_pred

In [None]:
sample_sub.to_csv('first.csv', index=False)