##  Otto Group Tabular dataset
- [x] Create Custom Dataset
    - [x] Extract classes 
- [x] Train a model

###  Create Custom dataset

In [1]:
import numpy as np
import pandas as pd

In [2]:
kind = 'train'
data = pd.read_csv(f'./data/otto-group-product-classification-challenge/{kind}.csv', delimiter=',')
data.head()

Unnamed: 0,id,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,...,feat_85,feat_86,feat_87,feat_88,feat_89,feat_90,feat_91,feat_92,feat_93,target
0,1,1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,Class_1
1,2,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,Class_1
2,3,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,Class_1
3,4,1,0,0,1,6,1,5,0,0,...,0,1,2,0,0,0,0,0,0,Class_1
4,5,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,Class_1


### Extract the classes 

In [3]:
import re
def split_it(name):
    x = re.findall('\d+', name)
    return int(x[0])

In [4]:
split_it('class_2312x')

2312

## Deep Learning part

In [5]:
import torch
from torch import nn, optim, tensor
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

### Custom Dataset 

In [6]:
import re
def split_it(name):
    x = re.findall('\d+', name)
    return int(x[0])

class OttoDatset(Dataset):
    def __init__(self):
        
        data = pd.read_csv(f'./data/otto-group-product-classification-challenge/train.csv', delimiter=',')
        data['target'] = data['target'].apply(split_it)
        self.len = data.shape[0]
        self.labels = tensor(data['target'].values-1, dtype=torch.long) # classes must start from 0 for Crossentropy to work
        self.features = tensor(data.drop('target', axis=1).values, dtype = torch.float)
        
    def __getitem__(self, index):
        return self.features[index], self.labels[index]
    
    def __len__(self):
        return self.len

In [7]:
otto = OttoDatset()

In [8]:
train_loader = DataLoader(otto,
                         batch_size=128, shuffle=True)

In [9]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(94, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 16)
        self.fc4 = nn.Linear(16, 9)
            
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

In [10]:
model = Model()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.1)

In [11]:
# Training loop
for epoch in range(10):
    for i, data in enumerate(train_loader):
        # Get the data
        inputs, labels = data

        # Forward pass
        y_preds = model(inputs)

        # Loss
        loss = criterion(y_preds, labels)
        
        # Zero grad, backward, upgrade
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print(f"Epoch {epoch+1} | Loss: {loss.item():.4f}")

Epoch 1 | Loss: 2.0827
Epoch 2 | Loss: 1.9857
Epoch 3 | Loss: 1.9674
Epoch 4 | Loss: 2.0354
Epoch 5 | Loss: 1.5983
Epoch 6 | Loss: 1.5143
Epoch 7 | Loss: 1.6607
Epoch 8 | Loss: 1.5427
Epoch 9 | Loss: 1.6378
Epoch 10 | Loss: 1.5489
