In [None]:
import os 
import pandas as pd 
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
from math import factorial
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.utils.class_weight import compute_class_weight
import matplotlib.pyplot as plt

In [None]:
sample_submission = pd.read_csv('/kaggle/input/tabular-playground-series-feb-2022/sample_submission.csv')
train = pd.read_csv('/kaggle/input/tabular-playground-series-feb-2022/train.csv')
test = pd.read_csv('/kaggle/input/tabular-playground-series-feb-2022/test.csv')

In [None]:
sub = pd.read_csv('../input/extra-blender-addition/submission.csv')
sub1 = pd.read_csv('../input/tpsfeb22-03-clustering-improves-the-predictions/submission.csv')

In [None]:
train.drop('row_id',axis = 1,inplace = True)
test.drop('row_id',axis = 1,inplace = True)

In [None]:
train.drop_duplicates(inplace = True)

temp = test.copy()
temp['target'] = sub.target
temp.drop_duplicates(inplace = True)

temp1 = test.copy()
temp1['target'] = sub1.target
temp1.drop_duplicates(inplace = True)

In [None]:
train.target.value_counts()

In [None]:
LE = LabelEncoder()
LE.fit(train.target)
list(LE.classes_)

In [None]:
new_data = train.append(temp1)
new_data.reset_index(inplace = True,drop = True)
new_data.drop_duplicates(inplace = True)

In [None]:
X = new_data.loc[:,new_data.columns != 'target']
y = LE.transform(new_data.target)

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 1)

In [None]:
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X.values)
        self.y = torch.tensor(y)  
    def __getitem__(self, idx):
        X = self.X[idx]
        y = self.y[idx]
        return X,y
    def __len__(self):
        return len(self.X)
    
class TestDataset(Dataset):
    def __init__(self, X):
        self.X = torch.tensor(X.values)
    def __getitem__(self, idx):
        return  self.X[idx]
    def __len__(self):
        return len(self.X)

In [None]:
train_set = CustomDataset(X,y)
test_set = CustomDataset(X_test,y_test)
pred_set =  TestDataset(test)

In [None]:
print('Length of train_set:', len(train_set))
print('Length of test_set:', len(test_set))
print('Length of pred_set:', len(pred_set))

In [None]:
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# Hyper parameters
num_epochs = 100
batch_size = 512
learning_rate = 0.0001
train_loader = DataLoader(dataset = train_set, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(dataset = test_set,batch_size = batch_size, shuffle = True)

In [None]:
class ResidualBlock(nn.Module):
    def __init__(self,channel):
        super().__init__()
        self.fc = nn.Linear(channel, channel)

    def forward(self, x):
        y = F.relu(self.fc(x))
        y = self.fc(y)

        return F.relu(x + y)

class Net(nn.Module):
    def __init__(self):
        super().__init__()  
        self.conv = nn.Sequential(              
           nn.Linear(286, 512), 
           nn.ReLU(),
           nn.BatchNorm1d(512),
           ResidualBlock(512),
           
           nn.Linear(512, 256), 
           nn.ReLU(),
           nn.BatchNorm1d(256),
           ResidualBlock(256),
            
           nn.Linear(256, 128),
           nn.ReLU(),
           nn.BatchNorm1d(128),
           ResidualBlock(128),
           
           nn.Linear(128, 64),
           nn.ReLU()
           # nn.BatchNorm1d(64),
           # ResidualBlock(64)
        )
        self.fc = nn.Linear(64,10)
        
    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return  x

In [None]:
ResNet_model = Net().to(device)
ResNet_optimizer = torch.optim.AdamW(ResNet_model.parameters(), lr = learning_rate)
ResNet_criterion = nn.CrossEntropyLoss()

# Residual Net

In [None]:
# Train the model
step = len(train_loader)
for epoch in range(num_epochs):
    epoch_loss = 0
    for x,label in tqdm(train_loader):
        x = x.to(device)
        label = label.to(device)
        
        # Forward pass
        output = ResNet_model(x.float())
        loss = ResNet_criterion(output, label)
        epoch_loss += loss.item()
        
        # Backward and optimize
        ResNet_optimizer.zero_grad()
        loss.backward()
        ResNet_optimizer.step()

    print(f'Epoch:[{epoch + 1}/{num_epochs}], Average Loss in ResNet: {epoch_loss / step:.6f}')     

# Compute accuracy

In [None]:
pred_loader = DataLoader(dataset = pred_set,batch_size = batch_size, shuffle = False)
pred_label = []
with torch.no_grad():
    for x in pred_loader:
        x = x.to(device)
        label = label.to(device)
        outputs = ResNet_model(x.float())
        pred_label.extend(torch.argmax(outputs, axis = 1).cpu().numpy())

In [None]:
sample_submission['target'] = LE.inverse_transform(pred_label)
print(sample_submission)
sample_submission.to_csv('submission.csv', index=False)

In [None]:
print(confusion_matrix(sample_submission.target, sub.target))

In [None]:
sum(sample_submission.target != sub.target)

In [None]:
print(classification_report(sample_submission.target,sub.target,digits = 5))