In [1]:
import numpy as np
import pandas as pd 

import torch
import torch.nn.functional as F
from torch import nn
import torch.optim as optim
import torch.nn.init as init
from torch.utils.data import DataLoader
from torch.autograd import Variable
from torchvision import transforms
from torch.optim import lr_scheduler
from PIL import Image

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
read_train = pd.read_csv('csv/mnist/train.csv')
read_test = pd.read_csv('csv/mnist/test.csv')

train_df = pd.DataFrame(read_train)
test_df = pd.DataFrame(read_test)

In [3]:
batch_size = 64
num_epochs = 2
learning_rate = 0.001
dropout_p = 0.5
log_interval = 1
num_hidden_units = 50
num_classes = 10
decay_rate = 0.9999
max_grad_norm = 5.0
n_test = len(test_df)
n_pixels = len(test_df.columns)

cuda = True
cuda = cuda and  torch.cuda.is_available()

seed = 42
torch.cuda.manual_seed(seed)



In [4]:
from torch.utils import data

class MnistDataset(data.Dataset):
    def __init__(self, file_path, transform = transforms.Compose([transforms.ToPILImage(), transforms.ToTensor(), 
                 transforms.Normalize(mean=(0.5,), std=(0.5,))])):
        
        df = pd.read_csv(file_path)
        
        if len(df.columns) == n_pixels:
            # test data
            self.X = df.values.reshape((-1,28,28)).astype(np.uint8)[:,:,:,None]
            self.y = None
        else:
            # training data
            self.X = df.iloc[:,1:].values.reshape((-1,28,28)).astype(np.uint8)[:,:,:,None]
            self.y = torch.from_numpy(df.iloc[:,0].values)
            
        self.transform = transform
    
    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        if self.y is not None:
            return self.transform(self.X[idx]), self.y[idx]
        else:
            return self.transform(self.X[idx])
    
    
    
train_set = MnistDataset(file_path='csv/mnist/train.csv',
                         transform=transforms.ToTensor(),)

test_set = MnistDataset(file_path='csv/mnist/test.csv',
                        transform=transforms.ToTensor())

train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=False)

In [1]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
          
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(p = 0.25),
            nn.Conv2d(32, 64, kernel_size=3, stride=1,padding=1),
             nn.BatchNorm2d(64),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
             nn.BatchNorm2d(64),
            
            nn.Conv2d(64, 128, kernel_size=3, stride=1,padding=1),
            nn.Conv2d(128, 128, kernel_size=3, stride=1,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),         
            nn.Dropout(p = 0.25),
        )
          
        self.classifier = nn.Sequential(
            nn.Dropout(p = 0.25),
            nn.Linear(128 * 7 * 7, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(p = 0.25),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1) 
        x = self.classifier(x)
        
        return x  

NameError: name 'nn' is not defined

In [6]:
model = Net()
optimizer = optim.Adam(model.parameters(), lr=0.003)
criterion = nn.CrossEntropyLoss()
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
if torch.cuda.is_available():
    model = model.cuda()
    criterion = criterion.cuda()

In [7]:
def train(epoch):
    model.train()
    exp_lr_scheduler.step()

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        
        loss.backward()
        optimizer.step()
        
        if (batch_idx + 1)% 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
                100. * (batch_idx + 1) / len(train_loader), loss.data))

            
            
def test(data_loader):
    model.eval()
    loss = 0
    correct = 0
    
    for data, target in data_loader:
        data, target = Variable(data, requires_grad=True), Variable(target)
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        
        output = model(data)
        
        loss += F.cross_entropy(output, target).data

        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        
    loss /= len(data_loader.dataset)
        
    print('\nAverage loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        loss, correct, len(data_loader.dataset),
        100. * correct / len(data_loader.dataset)))


In [8]:
n_epochs = 20

for epoch in range(n_epochs):
    train(epoch)
    test(train_loader)


Average loss: 0.0011, Accuracy: 41076/42000 (97.000%)


Average loss: 0.0008, Accuracy: 41373/42000 (98.000%)


Average loss: 0.0006, Accuracy: 41523/42000 (98.000%)


Average loss: 0.0008, Accuracy: 41347/42000 (98.000%)


Average loss: 0.0004, Accuracy: 41677/42000 (99.000%)


Average loss: 0.0005, Accuracy: 41617/42000 (99.000%)


Average loss: 0.0004, Accuracy: 41694/42000 (99.000%)


Average loss: 0.0002, Accuracy: 41827/42000 (99.000%)


Average loss: 0.0002, Accuracy: 41855/42000 (99.000%)


Average loss: 0.0002, Accuracy: 41866/42000 (99.000%)


Average loss: 0.0001, Accuracy: 41886/42000 (99.000%)


Average loss: 0.0001, Accuracy: 41891/42000 (99.000%)


Average loss: 0.0001, Accuracy: 41897/42000 (99.000%)


Average loss: 0.0001, Accuracy: 41899/42000 (99.000%)


Average loss: 0.0001, Accuracy: 41918/42000 (99.000%)


Average loss: 0.0001, Accuracy: 41922/42000 (99.000%)


Average loss: 0.0001, Accuracy: 41921/42000 (99.000%)


Average loss: 0.0001, Accuracy: 41926/42000 (99

In [9]:
def prediciton(data_loader):
    model.eval()
    test_pred = torch.LongTensor()
    
    for i, data in enumerate(data_loader):
        data = Variable(data, requires_grad=True)
        if torch.cuda.is_available():
            data = data.cuda()
            
        output = model(data)
        
        pred = output.cpu().data.max(1, keepdim=True)[1]
        test_pred = torch.cat((test_pred, pred), dim=0)
        
    return test_pred


test_pred = prediciton(test_loader)
out_df = pd.DataFrame(np.c_[np.arange(1, len(test_set)+1)[:,None], test_pred.numpy()], 
                      columns=['ImageId', 'Label'])

In [10]:
out_df.head(10)

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3
5,6,7
6,7,0
7,8,3
8,9,0
9,10,3


In [11]:
out_df.to_csv('submission.csv', index=False)