### Otto Group Product Classification

- __Author__ - [Nandeshwar Gupta](https://nandeshwar.in/)
- __Date__ - 04Feb2022
- __Link__ - [Kaggle](https://www.kaggle.com/nandeshwar) || [Github](https://github.com/nandesh553)

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
import torch.optim as optim

from torch import nn
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler
from tqdm.notebook import tqdm

### Checklist

- [x] Read Dataset
- [ ] Scale Dataset
    - [x] Z-Score Normalization
    - [ ] MinMax Scaling
    - [ ] Without Scaling
- [X] Data Splitting
- [X] Define Evaluation metric
- [X] Define Optimizer
- [X] Model Training
- [ ] Gridsearch
- [ ] KFold

### Read Data

In [None]:
train_df = pd.read_csv("/kaggle/input/otto-group-product-classification-challenge/train.csv", index_col='id')
test_df = pd.read_csv("/kaggle/input/otto-group-product-classification-challenge/test.csv", index_col='id')

### Label Encode classes

In [None]:
classes = train_df.iloc[:,-1].unique()
idx_to_class = {i:x for i, x in enumerate(classes)}
class_to_idx = {x:i for i, x in idx_to_class.items()}

print(len(idx_to_class.items()))
print(idx_to_class)

### Scaling target variables
To make training the network easier, we'll standardize each of the continuous variables. That is, we'll shift and scale the variables such that they have zero mean and a standard deviation of 1. It is also called __Z-Score Normalization__.

In [None]:
# Preprocessing [Z Score Normalization]

print("Scaling train_df .....")
for col in train_df.columns[:-1]:
    mean, std = train_df[col].mean(), train_df[col].std()
    train_df[col] = train_df[col].apply(lambda x: (x-mean)/std)
    
print("Scaling test_df  ..... ")
for col in test_df.columns[:-1]:
    mean, std = test_df[col].mean(), test_df[col].std()
    test_df[col] = test_df[col].apply(lambda x: (x-mean)/std)
    
print("Done")

In [None]:
num_workers = 2
batch_size = 64
validation_size = 0.2

In [None]:
# check if CUDA is available
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

In [None]:
class OttoDataset(Dataset):
    def __init__(self, df, train=True, normalized=False):
        self.df = df
        self.train = train
        
        # Reshuffle Dataset
        self.df = self.df.sample(frac=1)
        
        if normalized:
            raise NotImplementedError("Normaization is not yet implemented. Implement and see change in results")
        
        if self.train:
            self.X = torch.from_numpy(np.array(self.df.iloc[:,:-1]))
            self.y = [class_to_idx[x] for x in self.df.iloc[:,-1]]
            self.y = torch.from_numpy(np.array(self.y))
        else:
            self.X = torch.from_numpy(np.array(self.df))
            self.y = torch.tensor([])
        
        print(f"Shape of X {self.X.shape} and Y is {self.y.shape}")
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        if self.train:
            return self.X[idx], self.y[idx]
        else:
            return self.X[idx]

In [None]:
# Load pytorch dataset
train_dataset = OttoDataset(train_df, train=True)
test_dataset = OttoDataset(test_df, train=False)

In [None]:
# script to create train and validation set with shuffle
num_train = len(train_df)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(validation_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

# prepare data loaders (combine dataset and sampler)
train_loader = DataLoader(train_dataset, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers)
valid_loader = DataLoader(train_dataset, batch_size=batch_size, 
    sampler=valid_sampler, num_workers=num_workers)
test_loader = DataLoader(test_dataset, batch_size=batch_size, 
    num_workers=num_workers)

In [None]:
class MLP(nn.Module):
    
    def __init__(self):
        
        super(MLP, self).__init__()
        
        self.fc1 = nn.Linear(93, 256)
        self.fc2 = nn.Linear(256, 512)
        self.fc3 = nn.Linear(512, 9)
        
        self.dropout1 = nn.Dropout(p=0.25)
        
    def forward(self, x):
        
        x = self.dropout1(F.relu(self.fc1(x)))
        x = self.dropout1(F.relu(self.fc2(x)))
        x = self.fc3(x)
        
        return x       
    
model = MLP()
print(model)

# move tensors to GPU if CUDA is available
if train_on_gpu:
    model.cuda()

### Specify Loss Function and Optimizer
Decide on a loss and optimization function that is best suited for this classification task. The linked code examples from above, may be a good starting point; this PyTorch classification example or this, more complex Keras example. Pay close attention to the value for learning rate as this value determines how your model converges to a small error.

In [None]:
# specify loss function
criterion = nn.CrossEntropyLoss()

# specify optimizer
optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.6)

### Train the network

In [None]:
%%time

n_epochs = 100
valid_loss_min = np.Inf

for epoch in range(n_epochs):
    
    model.train()
    
    train_loss = 0.0
    valid_loss = 0.0
    
    for data, target in train_loader:
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        
        # clear the gradients of all optimized variables
        optimizer.zero_grad()

        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data.float())
        
        # calculate the batch loss        
        loss = criterion(output, target)
        
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        
        # perform a single optimization step (parameter update)
        optimizer.step()
        
        train_loss += loss.item()*data.size(0)
        
    model.eval()
    
    for data, target in valid_loader:
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data.float())
        
        # calculate the batch loss
        loss = criterion(output, target)
        
        # update average validation loss 
        valid_loss += loss.item()*data.size(0)
    
    # calculate average losses
    train_loss = train_loss/len(train_loader.dataset)
    valid_loss = valid_loss/len(valid_loader.dataset)
    
     # print training/validation statistics 
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
        epoch, train_loss, valid_loss))
    
    # save model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        valid_loss))
        torch.save(model.state_dict(), 'model_otto-1.pt')
        valid_loss_min = valid_loss

### Load the saved model
Check whether we are running on a gpu or CPU

In [None]:
if train_on_gpu:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
    
model.load_state_dict(torch.load('model_otto-1.pt', map_location=device))

In [None]:
sub_df = pd.DataFrame(0, index=np.arange(test_df.shape[0]), columns=np.concatenate([np.array(["id"]), classes]))
sub_df['id'] = test_df.index
sub_df

In [None]:
model.eval()

with torch.no_grad():
    counter = 0
    for data in test_loader:
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            data = data.cuda()

        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data.float())
        row = F.softmax(output).data
        fin_row = np.around(row.squeeze().to('cpu').numpy(), decimals=1)
        sub_df.iloc[counter*batch_size:(counter+1)*batch_size, 1:] = fin_row.copy()
#         print(counter*batch_size, (counter+1)*batch_size, fin_row.shape, data.shape)
        counter += 1

In [None]:
sub_df

In [None]:
sub_df.to_csv('submission.csv', index=False)