In [2]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
from torch.autograd import Variable
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [3]:
df = pd.read_csv('./train.csv', dtype=np.float32)

In [4]:
labels = df.label.values
features = df.loc[:,df.columns != 'label'].values/255

In [5]:
features_train, features_val, labels_train, labels_val = train_test_split(features,labels,train_size=0.95,random_state=32)



In [6]:
trainset = TensorDataset(torch.from_numpy(features_train), torch.from_numpy(labels_train))
valset  = TensorDataset(torch.from_numpy(features_val),torch.from_numpy(labels_val))

In [7]:
train_loader = DataLoader(trainset, batch_size=5, shuffle=True)
val_loader = DataLoader(valset, batch_size=5, shuffle=False)

In [None]:
data,label = iter(train_loader).next()
data.view(data.shape[0],1,28,28).shape

In [17]:
sample = torch.rand(5,4)
print(sample, sample.view(5,1,2,2))
sample

tensor([[0.8813, 0.7645, 0.5779, 0.4326],
        [0.5049, 0.5723, 0.5248, 0.1349],
        [0.7977, 0.2768, 0.3070, 0.1509],
        [0.4805, 0.3083, 0.0849, 0.9293],
        [0.4123, 0.5943, 0.2272, 0.4570]]) tensor([[[[0.8813, 0.7645],
          [0.5779, 0.4326]]],


        [[[0.5049, 0.5723],
          [0.5248, 0.1349]]],


        [[[0.7977, 0.2768],
          [0.3070, 0.1509]]],


        [[[0.4805, 0.3083],
          [0.0849, 0.9293]]],


        [[[0.4123, 0.5943],
          [0.2272, 0.4570]]]])


tensor([[0.8813, 0.7645, 0.5779, 0.4326],
        [0.5049, 0.5723, 0.5248, 0.1349],
        [0.7977, 0.2768, 0.3070, 0.1509],
        [0.4805, 0.3083, 0.0849, 0.9293],
        [0.4123, 0.5943, 0.2272, 0.4570]])

In [24]:
#CONV
class CNN(nn.Module):
    def __init__(self,in_channel, out_features):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=16, kernel_size=5)
        self.maxpool = nn.MaxPool2d(kernel_size=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5)
        self.relu = nn.ReLU(inplace=True)
        self.linear = nn.Linear(in_features=32*4*4,out_features=out_features)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.maxpool(x)      
        x = self.linear(x.view(x.shape[0],-1))
        return x

net = CNN(1,10)

In [33]:
# crossentropy loss
criteria = nn.CrossEntropyLoss()
#optimizer
optimizer = optim.SGD(net.parameters(),lr=0.01)
epochs = 5

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10000, gamma=0.1)

In [34]:
optimizer.param_groups

[{'dampening': 0,
  'initial_lr': 0.01,
  'lr': 0.01,
  'momentum': 0,
  'nesterov': False,
  'params': [Parameter containing:
   tensor([[[[-0.2005, -0.2751, -0.3513, -0.2686, -0.3787],
             [-0.0227, -0.2122, -0.2005, -0.0603,  0.0370],
             [ 0.3719,  0.4050,  0.1132,  0.3245,  0.3656],
             [ 0.3362,  0.2775,  0.3624,  0.3378,  0.0929],
             [-0.0174, -0.0900, -0.1158, -0.2666, -0.2614]]],
   
   
           [[[ 0.1593,  0.2734,  0.1950, -0.0528, -0.0230],
             [-0.0022, -0.0146,  0.4762,  0.3333, -0.0166],
             [-0.0092,  0.3799,  0.5382,  0.4692,  0.2406],
             [ 0.0706,  0.0594, -0.0528,  0.2356, -0.0244],
             [-0.0851, -0.2195, -0.0810,  0.1310,  0.2529]]],
   
   
           [[[ 0.1281,  0.0627, -0.0802,  0.0005, -0.1805],
             [ 0.1058, -0.0301,  0.1865, -0.1160, -0.1786],
             [ 0.0512,  0.0418,  0.1054, -0.0877, -0.0091],
             [-0.1857, -0.0497,  0.0562, -0.1712, -0.1867],
             

In [35]:
net.train()
train_loss = []
for e in range(epochs):
    for i, (img, label) in enumerate(train_loader):
        scheduler.step()

        #wrapper
        img = Variable(img.view(img.shape[0],1,28,28))
        label = Variable(label).type(torch.LongTensor)
        optimizer.zero_grad()

        out = net(img)
        loss = criteria(out, label)
        train_loss.append(loss.item())

        loss.backward()
        optimizer.step()
        if i %1000 ==0:
            accuracy,correct = 0,0
            net.eval()
            for val_img, val_label in val_loader:
                
                #wrapper
                val_img = Variable(val_img.view(img.shape[0],1,28,28))
                
                outs = net(val_img)
                
                predictions = torch.max(outs.data,1)[1]
                correct += torch.sum(predictions==val_label.type(torch.LongTensor))
            print(correct)

            accuracy = 1.*correct.item()/float(len(valset))
            print('Epoch {} iteration {} Valaccuracy {} loss {}'.format(e, i, accuracy, loss.item()))
            net.train()

tensor(2057)
Epoch 0 iteration 0 Valaccuracy 0.9795238095238096 loss 0.00160303118173033
tensor(2047)
Epoch 0 iteration 1000 Valaccuracy 0.9747619047619047 loss 0.27358168363571167
tensor(2049)
Epoch 0 iteration 2000 Valaccuracy 0.9757142857142858 loss 0.0014261245960369706
tensor(2051)
Epoch 0 iteration 3000 Valaccuracy 0.9766666666666667 loss 0.11215362697839737
tensor(2051)
Epoch 0 iteration 4000 Valaccuracy 0.9766666666666667 loss 0.004348563961684704
tensor(2045)
Epoch 0 iteration 5000 Valaccuracy 0.9738095238095238 loss 0.23737621307373047
tensor(2054)
Epoch 0 iteration 6000 Valaccuracy 0.9780952380952381 loss 0.00463790912181139
tensor(2052)
Epoch 0 iteration 7000 Valaccuracy 0.9771428571428571 loss 0.007993126288056374
tensor(2053)
Epoch 1 iteration 0 Valaccuracy 0.9776190476190476 loss 0.00016136169142555445
tensor(2057)
Epoch 1 iteration 1000 Valaccuracy 0.9795238095238096 loss 0.01728811301290989
tensor(2056)
Epoch 1 iteration 2000 Valaccuracy 0.979047619047619 loss 0.000469

In [37]:
testdf = pd.read_csv('./test.csv', dtype=np.float32)
test_features = torch.from_numpy(testdf.values)
out = net(test_features.view(test_features.shape[0],1,28,28))

In [38]:
test_predictions = torch.max(out,1)[1]

In [39]:
#save to csv
testdf = pd.DataFrame({'ImageId': testdf.index.values+1,'Label':test_predictions.numpy()})
testdf.to_csv('./pred.csv', index=False)

In [40]:
torch.load('./ckpt.pth')

OrderedDict([('base.conv1.weight',
              tensor([[[[ 9.0361e-03,  9.5278e-03, -1.8638e-02,  ..., -4.2024e-02,
                         -4.3860e-02, -6.9401e-02],
                        [ 4.6278e-03,  5.9695e-03,  1.4206e-02,  ...,  3.3915e-03,
                         -1.8355e-02, -3.4532e-02],
                        [ 2.0534e-02,  2.1520e-02,  1.4590e-02,  ...,  9.6053e-02,
                          5.9016e-02,  4.9596e-02],
                        ...,
                        [-6.2068e-04,  2.6295e-02, -8.3619e-03,  ..., -1.1547e-01,
                         -6.9002e-02,  9.4092e-03],
                        [ 4.2704e-03,  4.5313e-02,  5.8362e-02,  ...,  2.4599e-02,
                         -2.9172e-02, -1.2651e-02],
                        [-7.1744e-02, -2.7744e-02, -1.4561e-02,  ...,  3.5631e-02,
                          2.3831e-02,  4.4740e-03]],
              
                       [[-2.1213e-02,  6.3990e-03,  1.8122e-02,  ...,  4.6272e-02,
                          3