# Pytorch Tutorial

Pytorch is a popular deep learning framework and it's easy to get started.

In [10]:
import torch
import torch.nn as nn
import torch.utils.data as data
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm
import time
import os
 
# third-party library
from torch.autograd import Variable
import matplotlib.pyplot as plt

BATCH_SIZE = 128
NUM_EPOCHS = 10
LR=0.01

First, we read the mnist data, preprocess them and encapsulate them into dataloader form.

In [6]:
# preprocessing
normalize = transforms.Normalize(mean=[.5], std=[.5])
transform = transforms.Compose([transforms.ToTensor(), normalize])

# download and load the data
train_dataset = torchvision.datasets.MNIST(root='./mnist/', train=False, transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='./mnist/', train=False, transform=transform, download=False)

# encapsulate them into dataloader form
train_loader = data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
test_loader = data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=True)

test_x = torch.unsqueeze(test_dataset.test_data, dim=1).type(torch.FloatTensor)[:2000]/255.   # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1)
test_y = test_dataset.test_labels[:2000]

Then, we define the model, object function and optimizer that we use to classify.

In [8]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(  # input shape (1, 28, 28)
            nn.Conv2d(
                in_channels=1,      # input height
                out_channels=16,    # n_filters
                kernel_size=5,      # filter size
                stride=1,           # filter movement/step
                padding=2,      # 如果想要 con2d 出来的图片长宽没有变化, padding=(kernel_size-1)/2 当 stride=1
            ),      # output shape (16, 28, 28)
            nn.ReLU(),    # activation
            nn.MaxPool2d(kernel_size=2),    # 在 2x2 空间里向下采样, output shape (16, 14, 14)
        )
        self.conv2 = nn.Sequential(  # input shape (16, 14, 14)
            nn.Conv2d(16, 32, 5, 1, 2),  # output shape (32, 14, 14)
            nn.ReLU(),  # activation
            nn.MaxPool2d(2),  # output shape (32, 7, 7)
        )
        self.out = nn.Linear(32 * 7 * 7, 10)   # fully connected layer, output 10 classes

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)   # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
        output = self.out(x)
        return output

model = CNN()
print(model)  # net architecture

# TODO:define loss function and optimiter
#optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)   # optimize all cnn parameters
#loss_func = nn.CrossEntropyLoss()                       # the target label is not one-hotted
# optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)   # optimize all cnn parameters
# loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.CrossEntropyLoss()

CNN(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (out): Linear(in_features=1568, out_features=10, bias=True)
)


Next, we can start to train and evaluate!

In [15]:
# train and evaluate
step=0
# training and testing
for epoch in range(NUM_EPOCHS):
    for step, (x, y) in enumerate(train_loader):   # 分配 batch data, normalize x when iterate train_loader
        b_x=Variable(x)
        b_y =Variable(y)

        output = model(b_x)
        loss = criterion(output,b_y)   # cross entropy loss

 
        pred_y = torch.max (output, 1)[1].data.squeeze ()   ###这里不是很懂？？
        train_accuracy = (pred_y == b_y).sum().numpy() / b_y.size(0)

        optimizer.zero_grad()           # clear gradients for this training step
        loss.backward()                 # backpropagation, compute gradients
        optimizer.step()                # apply gradients
        if step % 128 == 0:
            torch.save(model.state_dict(), 'params.pkl')
            test_output = model (test_x)  
            pred_y = torch.max (test_output, 1)[1].data.squeeze ()   ###这里不是很懂？？
            test_accuracy = (pred_y == test_y).sum().numpy() / test_y.size(0)
            print ('Epoch: ', epoch, '| train loss: %.4f' % loss.data[0], '| train accuracy: %.4f' % train_accuracy,'| test accuracy: %.4f' % test_accuracy)
        
    model.load_state_dict(torch.load('params.pkl'))
         
        
    # evaluate
    # TODO:calculate the accuracy using traning and testing dataset
    
    
    
    




Epoch:  0 | train loss: 0.0067 | train accuracy: 1.0000 | test accuracy: 0.8105
Epoch:  1 | train loss: 0.0352 | train accuracy: 0.9922 | test accuracy: 0.8025
Epoch:  2 | train loss: 0.0404 | train accuracy: 0.9844 | test accuracy: 0.8275
Epoch:  3 | train loss: 0.0023 | train accuracy: 1.0000 | test accuracy: 0.8425
Epoch:  4 | train loss: 0.0425 | train accuracy: 0.9844 | test accuracy: 0.8605
Epoch:  5 | train loss: 0.0501 | train accuracy: 0.9844 | test accuracy: 0.8880
Epoch:  6 | train loss: 0.0047 | train accuracy: 1.0000 | test accuracy: 0.8885
Epoch:  7 | train loss: 0.0222 | train accuracy: 0.9922 | test accuracy: 0.8840
Epoch:  8 | train loss: 0.0035 | train accuracy: 1.0000 | test accuracy: 0.8920
Epoch:  9 | train loss: 0.0179 | train accuracy: 0.9922 | test accuracy: 0.8805


#### Q5:
Please print the training and testing accuracy.