# Otto Group Product Classification Challenge
### Classify products into the correct category

## Package

In [1]:
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.optim as optim

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Prepare Dataset

In [16]:
csv_path='train.csv'
class MyDataset(Dataset):
    def __init__(self, transforms=None):
        self.df_train = pd.read_csv('train.csv')
        self.X_train = self.df_train.iloc[:,1:-1].values
        self.X_train = Variable(torch.FloatTensor(self.X_train))
        self.df_train['target'] =  self.df_train['target'].map({'Class_1': 1, 'Class_2': 2,
                                          'Class_3': 3, 'Class_4': 4,
                                          'Class_5': 5, 'Class_6': 6,
                                          'Class_7': 7, 'Class_8': 8,
                                          'Class_9': 9})
        self.df_train['target'] = self.df_train['target']
        self.Y_train=torch.tensor(self.df_train['target'].values)
        self.Y_train=self.Y_train.long()
        self.Y_train = self.Y_train - 1
        self.len=self.Y_train.shape[0]
    def __getitem__(self, index):
        return self.X_train[index], self.Y_train[index]
    
    def __len__(self):
        return self.len

In [17]:
dataset = MyDataset()
train_loader = DataLoader(dataset=dataset,
                         batch_size=32,
                         shuffle=True,
                         num_workers=0)

## Design Model using Class

In [18]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.l1 = torch.nn.Linear(93, 512)
        self.l2 = torch.nn.Linear(512, 256)
        self.l3 = torch.nn.Linear(256, 128)
        self.l4 = torch.nn.Linear(128, 64)
        self.l5 = torch.nn.Linear(64, 9)
    def forward(self, x):
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = F.relu(self.l3(x))
        x = F.relu(self.l4(x))
        return self.l5(x)
    
model = Net()

## Construct Loss and Optimizer

In [19]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

## Train and Test

In [20]:
def train(epoch):
    running_loss = 0.0
    for batch_idx, data in enumerate(train_loader, 0):
        inputs, target = data
        optimizer.zero_grad()
        # forward + backward + update
        outputs = model(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if batch_idx % 300 == 299:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
            running_loss = 0.0

In [60]:
for epoch in range(6):
    train(epoch)

[1,   300] loss: 0.048
[1,   600] loss: 0.041
[1,   900] loss: 0.043
[1,  1200] loss: 0.044
[1,  1500] loss: 0.047
[1,  1800] loss: 0.052
[2,   300] loss: 0.042
[2,   600] loss: 0.050
[2,   900] loss: 0.047
[2,  1200] loss: 0.041
[2,  1500] loss: 0.054
[2,  1800] loss: 0.058
[3,   300] loss: 0.046
[3,   600] loss: 0.039
[3,   900] loss: 0.054
[3,  1200] loss: 0.049
[3,  1500] loss: 0.049
[3,  1800] loss: 0.056
[4,   300] loss: 0.057
[4,   600] loss: 0.057
[4,   900] loss: 0.066
[4,  1200] loss: 0.057
[4,  1500] loss: 0.049
[4,  1800] loss: 0.070
[5,   300] loss: 0.049
[5,   600] loss: 0.044
[5,   900] loss: 0.042
[5,  1200] loss: 0.051
[5,  1500] loss: 0.050
[5,  1800] loss: 0.067
[6,   300] loss: 0.045
[6,   600] loss: 0.053
[6,   900] loss: 0.060
[6,  1200] loss: 0.048
[6,  1500] loss: 0.046
[6,  1800] loss: 0.045


## Test

In [61]:
df_test=pd.read_csv('test.csv')
X_test = df_test.iloc[:,1:]
X_test = torch.from_numpy(np.array(X_test).astype(np.float32))
with torch.no_grad():
    test_result = model(X_test)
values, labels = torch.max(test_result, 1)
result = labels.data.numpy()

In [62]:
result

array([1, 7, 5, ..., 1, 3, 2])

In [63]:
df_sub = pd.read_csv('submit.csv')
submission = pd.DataFrame({'id': df_sub['id'], 'Class': result})

In [64]:
submission

Unnamed: 0,id,Class
0,1,1
1,2,7
2,3,5
3,4,2
4,5,8
...,...,...
144363,144364,5
144364,144365,2
144365,144366,1
144366,144367,3


In [65]:
submission_1 = pd.get_dummies(submission['Class'])

In [66]:
print(submission.head(5))

   id  Class
0   1      1
1   2      7
2   3      5
3   4      2
4   5      8


In [67]:
submission_1.columns=["Class_1", "Class_2", "Class_3", "Class_4", "Class_5", "Class_6", "Class_7", "Class_8", "Class_9"]

In [68]:
submission

Unnamed: 0,id,Class
0,1,1
1,2,7
2,3,5
3,4,2
4,5,8
...,...,...
144363,144364,5
144364,144365,2
144365,144366,1
144366,144367,3


In [69]:
submission.drop(['Class'],axis=1,inplace=True)

In [70]:
submission

Unnamed: 0,id
0,1
1,2
2,3
3,4
4,5
...,...
144363,144364
144364,144365
144365,144366
144366,144367


In [71]:
submission = pd.concat([submission,submission_1],axis = 1)

In [72]:
submission

Unnamed: 0,id,Class_1,Class_2,Class_3,Class_4,Class_5,Class_6,Class_7,Class_8,Class_9
0,1,0,1,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,1,0
2,3,0,0,0,0,0,1,0,0,0
3,4,0,0,1,0,0,0,0,0,0
4,5,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...
144363,144364,0,0,0,0,0,1,0,0,0
144364,144365,0,0,1,0,0,0,0,0,0
144365,144366,0,1,0,0,0,0,0,0,0
144366,144367,0,0,0,1,0,0,0,0,0


In [73]:
submission.to_csv('submission.csv', index=False)