In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader,TensorDataset

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,MaxAbsScaler
from sklearn.metrics import roc_curve,auc,confusion_matrix,roc_auc_score
from scipy.stats import zscore

from collections import Counter

In [None]:
train_on_gpu = torch.cuda.is_available()
if train_on_gpu:
    print('cuda available')
else:
    raise ValueError("cuda isn't available")

In [None]:
train = pd.read_csv('../input/tabular-playground-series-nov-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-nov-2021/test.csv')
sample_submission = pd.read_csv('../input/tabular-playground-series-nov-2021/sample_submission.csv')

In [None]:
feature_cols = [col for col in train.columns if col.startswith('f')]

In [None]:
X = train[feature_cols].values
y = train.target.values

test = test[feature_cols].values

In [None]:
scaler = MaxAbsScaler()
xtrain,xvalid,ytrain,yvalid = train_test_split(X,y,test_size=0.2,random_state=1234)
xtrain = scaler.fit_transform(xtrain)
xvalid = scaler.transform(xvalid)
test = scaler.transform(test)

In [None]:
randIds = np.random.randint(0,10_000,20)
fig,ax = plt.subplots(2,10,figsize=(20,5))

for idx,axi in zip(randIds,ax.flatten()):
    axi.imshow(xtrain[idx].reshape(-1,10))
    axi.text(x= 5 ,y= 5,s=ytrain[idx],backgroundcolor='magenta',ha='center',fontsize=12)
plt.show()

In [None]:
bSize = 32

xtrain = torch.tensor(xtrain).float()
ytrain = torch.tensor(ytrain).float().reshape(-1,1)

xvalid = torch.tensor(xvalid).float()
yvalid = torch.tensor(yvalid).float().reshape(-1,1)

#create tensordataset
trainData = TensorDataset(xtrain,ytrain)
validData = TensorDataset(xvalid,yvalid)

#creating data loader
trainLoader = DataLoader(trainData,batch_size=bSize,shuffle=True,drop_last=True)
validLoader = DataLoader(validData,batch_size=validData.tensors[0].shape[0])

print(f"No of traing samples : {len(trainLoader.dataset)}\nNo of valid samples: {len(validLoader.dataset)}")

In [None]:
def createModel():
    class Net(nn.Module):
        def __init__(self):
            super().__init__()

            self.input_dim = len(feature_cols)
            self.batch_size = bSize
            self.conv = nn.Sequential(
                nn.Conv2d(1, 5, 3),
                nn.ReLU(),
#                 nn.MaxPool2d(2, 2)
            )
            
            self.fc = nn.Sequential(
                nn.Linear(5 * 8 * 8, 80),
                nn.ReLU(),
                nn.BatchNorm1d(80),
#                 nn.Dropout(),
                
                nn.Linear(80, 64),
                nn.ReLU(),
                nn.BatchNorm1d(64),
#                 nn.Dropout(),
                
                nn.Linear(64, 32),
                nn.ReLU(),
                nn.BatchNorm1d(32),
#                 nn.Dropout(),
                
                nn.Linear(32, 16),
                nn.ReLU(),
                nn.BatchNorm1d(16),
#                 nn.Dropout(),
                
                nn.Linear(16, 1)
            )

        def forward(self,x):
            x = x.view(-1,1,10,10)
#             print(x.shape)
            x = self.conv(x)
#             print(x.shape)
            nUnits = x.shape.numel()/x.shape[0]
            x = x.view(-1,int(nUnits))
            x = self.fc(x)
            return x


    model = Net()

    if train_on_gpu:
        model.cuda()
        
    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(params=model.parameters(),lr=1e-2)
    
    return model,criterion,optimizer

In [None]:
sampleX,sampleY = next(iter(trainLoader))

In [None]:
model,lossfun,optimizer = createModel()

In [None]:
model(sampleX.cuda())

In [None]:
def trainModel(epoch):
    
    trainAccuracy = torch.zeros(epoch)
    validAccuracy = torch.zeros(epoch)
    trainAUC = torch.zeros(epoch)
    validAUC = torch.zeros(epoch)
    
    model,lossFunc,optimizer = createModel()
    
    for i in range(epoch):
        for X,y in trainLoader:
            X,y = X.cuda(),y.cuda()
            batchAccuracy = []
            batchAUC = []
            model.train()
            
            #forward pass
            yHat = model(X)
            loss = lossFunc(yHat,y)
            batchAUC.append(roc_auc_score(y.detach().cpu(),yHat.detach().cpu()))
            
            yHat = (yHat>0).float()
            batchAccuracy.append(100*torch.mean((yHat==y).float()).item())
            
            #backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
        trainAccuracy[i] = np.mean(batchAccuracy)
        trainAUC[i] = np.mean(batchAUC)
        model.eval()
        X,y = iter(validLoader).next()
        X,y = X.cuda(),y.cuda()
        with torch.no_grad():
            yHat = model(X)
        loss = lossFunc(yHat,y)
        yHat = (yHat>0).float()
        validAccuracy[i] = 100*torch.mean((y==yHat).float()).item()
        validAUC[i] = roc_auc_score(y.detach().cpu(),yHat.detach().cpu())
        print(f"Train AUC: {trainAUC[i].item():.4f}")
        print(f"Validation AUC: {validAUC[i].item():.4f}")
    return model,validAUC,trainAUC

In [None]:
net,validAUC,trainAUC = trainModel(50)

In [None]:
plt.figure(figsize=(12,8))
plt.plot(trainAUC,'o--',label='Train')
plt.plot(validAUC,'o--',label='Validation')
plt.xlabel('Training Epochs')
plt.ylabel('AUC')
plt.legend()
plt.show()

In [None]:
pred = net(xvalid.cuda())
pred = torch.sigmoid(pred)

In [None]:
fpr, tpr, _ = roc_curve(yvalid, pred.detach().cpu(), pos_label=1)
auc = auc(fpr, tpr)

In [None]:
X,y = iter(validLoader).next()
yHat = net(X.cuda())
validPreds = (yHat>0).float()

validConf = confusion_matrix(y,validPreds.cpu())

In [None]:
plt.rcParams.update({'font.size':13})

plt.figure(figsize=(10,10))
plt.title(f"Confusion metrix for valid dataset\n\n AUC: {auc:.3f}")
plt.imshow(validConf,'Greens',vmax=len(validPreds)/2)
plt.xticks([0,1])
plt.yticks([0,1])
plt.xlabel("Predicted label")
plt.ylabel("True label")

plt.text(0,0,validConf[0,0])
plt.text(0,1,validConf[1,0])
plt.text(1,0,validConf[0,1])
plt.text(1,1,validConf[1,1])

plt.show()

In [None]:
test = torch.tensor(test).float()
finalPreds = net(test.cuda())
# finalPreds = torch.sigmoid(finalPreds)

In [None]:
sample_submission['target'] = finalPreds.detach().cpu()
sample_submission.to_csv('submission01.csv',index=False)
sample_submission.head()