In [2]:
# !wget -nc "http://labfile.oss.aliyuncs.com/courses/1081/MNIST.zip"
# !unzip -o "MNIST.zip"

In [1]:
# Transform numpy array to pytorch tensor
import torchvision

# Training data, train=True，60000 items
train = torchvision.datasets.MNIST(
    root='.', train=True, transform=torchvision.transforms.ToTensor(), download=True)
# Test data ， train=False， 10000 items
test = torchvision.datasets.MNIST(
    root='.', train=False, transform=torchvision.transforms.ToTensor(), download=True)

In [2]:
train.data.shape, train.targets.shape, test.data.shape, test.targets.shape

(torch.Size([60000, 28, 28]),
 torch.Size([60000]),
 torch.Size([10000, 28, 28]),
 torch.Size([10000]))

In [3]:
import torch

train_loader = torch.utils.data.DataLoader(dataset=train, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test,batch_size=64, shuffle=False)

train_loader, test_loader

(<torch.utils.data.dataloader.DataLoader at 0x11dc6bdd0>,
 <torch.utils.data.dataloader.DataLoader at 0x11dc6be10>)

### 传统方法建模

Input（784） → Fully connected layer 1 （784, 512）→ Fully connected layer 2 （512, 128）→ output（10）

In [4]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 10)
    
    def forward(self,x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [5]:
## Create model 
model = Net()
model

Net(
  (fc1): Linear(in_features=784, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=10, bias=True)
)

In [6]:
## Loss function and Optimizer
loss_fn = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(), lr=0.002)

In [7]:
def fit(epochs, model, opt):
    for epoch in range(epochs):
        for i, (images, labels) in enumerate(train_loader):
            images = images.reshape(-1, 28*28)
            labels = labels
            outpus = model(images)
            loss = loss_fn(outpus, labels)
            opt.zero_grad()
            loss.backward()
            opt.step()
            
            if (i+1)%100 ==0:
                print('Epoch [{}/{}], Batch [{}/{}], Train loss: {:.3f}'.format(epoch+1,
                       epochs, i+1, len(train_loader), loss.item()))
    
        ## For every epoch, calculate the result
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.reshape(-1, 28*28)
            labels = labels
            outputs = model(images)
            # 得到输出最大值 _ 及其索引 predicted
            _, predicted = torch.max(outputs.data ,1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
        print('============ Test accuracy: {:.3f} ============='.format(
            correct / total))

In [8]:
fit(epochs=1, model=model, opt=opt)

Epoch [1/1], Batch [100/938], Train loss: 0.384
Epoch [1/1], Batch [200/938], Train loss: 0.226
Epoch [1/1], Batch [300/938], Train loss: 0.175
Epoch [1/1], Batch [400/938], Train loss: 0.348
Epoch [1/1], Batch [500/938], Train loss: 0.280
Epoch [1/1], Batch [600/938], Train loss: 0.049
Epoch [1/1], Batch [700/938], Train loss: 0.105
Epoch [1/1], Batch [800/938], Train loss: 0.035
Epoch [1/1], Batch [900/938], Train loss: 0.108


### Sequential 方法建模

In [12]:
model_s = nn.Sequential(
    nn.Linear(784, 512),
    nn.ReLU(),
    nn.Linear(512, 128),
    nn.ReLU(),
    nn.Linear(128, 10),
)

model_s

Sequential(
  (0): Linear(in_features=784, out_features=512, bias=True)
  (1): ReLU()
  (2): Linear(in_features=512, out_features=128, bias=True)
  (3): ReLU()
  (4): Linear(in_features=128, out_features=10, bias=True)
)

In [13]:
opt_s = torch.optim.Adam(model_s.parameters(), lr=0.002)  # Adam 优化器
fit(epochs=1, model=model_s, opt=opt_s)  # 训

Epoch [1/1], Batch [100/938], Train loss: 0.156
Epoch [1/1], Batch [200/938], Train loss: 0.166
Epoch [1/1], Batch [300/938], Train loss: 0.432
Epoch [1/1], Batch [400/938], Train loss: 0.137
Epoch [1/1], Batch [500/938], Train loss: 0.211
Epoch [1/1], Batch [600/938], Train loss: 0.124
Epoch [1/1], Batch [700/938], Train loss: 0.059
Epoch [1/1], Batch [800/938], Train loss: 0.035
Epoch [1/1], Batch [900/938], Train loss: 0.214
