In [10]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

import os
print(os.listdir("../input"))

['train.csv', 'sample_submission.csv', 'test.csv']


Read the training and test[](http://) datasets

In [11]:
%%time
train = pd.read_csv('../input/train.csv')
test = pd.read_csv('../input/test.csv')

print(train.shape, test.shape)

(200000, 202) (200000, 201)
CPU times: user 15.6 s, sys: 812 ms, total: 16.4 s
Wall time: 16.4 s


Split the training dataset for training and validation

In [12]:
y = train['target'].values
X = train.drop(['ID_code', 'target'], axis=1).values

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.33, random_state=42)

In [13]:
print(len(X_train), len(X_val))
print(len(y_train), len(y_val))

134000 66000
134000 66000


Construct a 2-Layer NN

In [14]:
#Seed
torch.manual_seed(1234)

#hyperparameters
hl = 10
lr = 0.01
num_epoch = 100

#Model
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(200, 10)
        self.fc2 = nn.Linear(10, 2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
net = Net()

#choose optimizer and loss function
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=lr)

Train the NN

In [15]:
%%time
#train
for epoch in range(num_epoch):
    X = Variable(torch.Tensor(X_train).float())
    Y = Variable(torch.Tensor(y_train).long())

    #feedforward - backprop
    optimizer.zero_grad()
    out = net(X)
    loss = criterion(out, Y)
    loss.backward()
    optimizer.step()

    if (epoch) % 10 == 0:
        print ('Epoch [%d/%d] Loss: %.4f' 
                   %(epoch+1, num_epoch, loss.item()))

Epoch [1/100] Loss: 1.1303
Epoch [11/100] Loss: 0.3364
Epoch [21/100] Loss: 0.3324
Epoch [31/100] Loss: 0.3302
Epoch [41/100] Loss: 0.3288
Epoch [51/100] Loss: 0.3276
Epoch [61/100] Loss: 0.3267
Epoch [71/100] Loss: 0.3260
Epoch [81/100] Loss: 0.3253
Epoch [91/100] Loss: 0.3247
CPU times: user 26.3 s, sys: 3.77 s, total: 30 s
Wall time: 18.7 s


Validate the NN

In [16]:
%%time

#Validation
X = Variable(torch.Tensor(X_val).float())
Y = torch.Tensor(y_val).long()
out = net(X)

_, predicted = torch.max(out.data, 1)

#get accuration
print('Accuracy of the network %d %%' % (100 * torch.sum(Y==predicted) / len(y_val)))

Accuracy of the network 89 %
CPU times: user 68 ms, sys: 4 ms, total: 72 ms
Wall time: 52.3 ms


Perform prediction on test dataset

In [17]:
%%time

#Test
X_test = test.drop(['ID_code'], axis=1).values

X = Variable(torch.Tensor(X_test).float())
out = net(X)

_, predicted = torch.max(out.data, 1)

CPU times: user 276 ms, sys: 352 ms, total: 628 ms
Wall time: 589 ms


Output prediction to CSV

In [18]:
ID_code = test['ID_code']
target = predicted.data.numpy()

my_submission = pd.DataFrame({'ID_code': ID_code, 'target': target})
my_submission.to_csv('submission.csv', index=False)

my_submission.head()

Unnamed: 0,ID_code,target
0,test_0,0
1,test_1,0
2,test_2,0
3,test_3,0
4,test_4,0
