In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data_utils
from torch.autograd import Variable

import numpy as np

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv1_bn = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.conv2_bn = nn.BatchNorm2d(64)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(1024, 50)
        self.fc2 = nn.Linear(50, 10)
        
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1_bn(self.conv1(x)), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2_bn(self.conv2(x))), 2))
        x = x.view(-1, 1024)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        
        return F.log_softmax(x, dim=1)

In [3]:
def train(model, dataloader, optimizer, epoch):
    model.train()
    
    for batch_idx, (x, y) in enumerate(dataloader):
        x, y = Variable(x), Variable(y)

        optimizer.zero_grad()
        prediction = model(x)

        loss = F.nll_loss(prediction, y)
        loss.backward()

        optimizer.step() 
        
        if batch_idx % 1000 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(x), len(dataloader.dataset),
                100. * batch_idx / len(dataloader), loss.data[0]))

In [4]:
def test(model, dataloader):
    model.eval()
    
    
    val_loss = 0
    correct = 0
    
    with torch.no_grad():
        for x, y in dataloader:
            x, y = Variable(x), Variable(y)
            
            prediction = model(x)
            val_loss += F.nll_loss(prediction, y, size_average=False).item()
            
            y_hat = prediction.max(1, keepdim=True)[1]
            correct += y_hat.eq(y.view_as(y_hat)).sum().item()
    
    val_loss /= len(dataloader.dataset)
    
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        val_loss, correct, len(dataloader.dataset),
        100. * correct / len(dataloader.dataset)))
            

In [5]:
import pandas as pd

In [6]:
data = pd.read_csv('data/train.csv')

In [7]:
X = data.drop('label', axis=1).values.reshape(-1, 1, 28, 28)
y = data['label'].values

In [9]:
from sklearn.model_selection import train_test_split

In [10]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)

In [6]:
model = Net()

In [12]:
opt = optim.Adam(model.parameters(), lr=3e-03)

In [13]:
X_train = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train)

In [14]:
train_data = data_utils.TensorDataset(X_train, y_train)
train_loader = data_utils.DataLoader(train_data, batch_size=128, shuffle=True)

In [15]:
X_val = torch.from_numpy(X_val).float()
y_val = torch.from_numpy(y_val)

In [16]:
val_data = data_utils.TensorDataset(X_val, y_val)
val_loader = data_utils.DataLoader(val_data, batch_size=128, shuffle=True)

In [17]:
for epoch in range(1, 5+1):
    train(model, dataloader=train_loader, optimizer=opt, epoch=epoch)
    test(model, dataloader=val_loader)
    for param_group in opt.param_groups:
        param_group['lr'] *= 0.8




Test set: Average loss: 0.0705, Accuracy: 8235/8400 (98%)


Test set: Average loss: 0.0487, Accuracy: 8275/8400 (99%)


Test set: Average loss: 0.0425, Accuracy: 8290/8400 (99%)


Test set: Average loss: 0.0426, Accuracy: 8287/8400 (99%)


Test set: Average loss: 0.0358, Accuracy: 8302/8400 (99%)



In [18]:
X = torch.from_numpy(X).float()
y = torch.from_numpy(y)

In [19]:
train_data = data_utils.TensorDataset(X, y)
train_loader = data_utils.DataLoader(train_data, batch_size=128, shuffle=True)

In [20]:
opt = optim.Adam(model.parameters(), lr=3e-04)

In [21]:
for epoch in range(1, 3+1):
    train(model, dataloader=train_loader, optimizer=opt, epoch=epoch)
    test(model, dataloader=val_loader)
    for param_group in opt.param_groups:
        param_group['lr'] *= 0.8




Test set: Average loss: 0.0302, Accuracy: 8315/8400 (99%)


Test set: Average loss: 0.0272, Accuracy: 8328/8400 (99%)


Test set: Average loss: 0.0249, Accuracy: 8334/8400 (99%)



In [25]:
torch.save(model.state_dict(), 'models/digit-recognizer-pytorch.pt')

In [7]:
model = Net()
model.load_state_dict(torch.load('models/digit-recognizer-pytorch.pt'))

In [8]:
X_test = pd.read_csv('data/test.csv').values.reshape(-1, 1, 28, 28)

In [9]:
X_test = torch.from_numpy(X_test).float()

In [10]:
prediction = model(X_test)

In [30]:
pp = []
for p in prediction.data.numpy():
    pp.append(np.argmax(p))

In [33]:
pp = np.array(pp)

In [34]:
pp.shape

(28000,)

In [36]:
X_test = pd.read_csv('data/test.csv')

In [38]:
X_test.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [40]:
ids = [i for i in range(1, len(X_test)+1)]

In [43]:
submit = pd.DataFrame(data={'ImageId': ids, 'Label': pp})

In [44]:
submit.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,2
4,5,3


In [46]:
submit.to_csv('submit_pytorch.csv', index=False)