## Mnist

In [1]:
import numpy as np
import torch
import matplotlib.pyplot as plt
from torch import nn, optim
from torch.autograd import Variable
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [2]:
train_dataset = datasets.MNIST(root='/Users/qi/Documents/cood/ML/dataBase/', 
                               train=True,
                               transform = transforms.ToTensor(),
                               download=True)

test_dataset = datasets.MNIST(root='/Users/qi/Documents/cood/ML/dataBase/', 
                               train=False,
                               transform = transforms.ToTensor(),
                               download=True)

In [3]:
batch_size = 64

In [4]:
train_loader = DataLoader(dataset=train_dataset, 
                          batch_size = batch_size,
                          shuffle=True)

test_loader = DataLoader(dataset=test_dataset, 
                         batch_size = batch_size,
                         shuffle=True)

### 定义网络结构

In [5]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 10)
        # 64，10 dim=1 代表着用第一个维度进行 softmax 转换，即 10
        self.softmax = nn.Softmax(dim=1)
        
        
    def forward(self, x):
        x = x.view(x.size()[0], -1)
        x = self.fc1(x)
        x = self.softmax(x)
        return x

In [6]:
LR = 0.5
model = Net()
mse_loss = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), LR)

In [7]:
def train():
    for i, data in enumerate(train_loader):
        inputs, labels = data
        # 独热编码
        labels = labels.reshape(-1,1)
        # tensor.scatter(dim, index, src)
        # dim 对哪个维度进行独热编码
        # index 要将 src 中的值放到 tensor 的哪个位置
        # src 插入 index 的数值
        one_hot = torch.zeros(inputs.shape[0],10).scatter(1, labels, 1)
        out = model(inputs)
        # 这两个数值的维度要保持一致
        loss = mse_loss(out, one_hot)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [8]:
def test():
    corrent = 0
    for i, data in enumerate(test_loader):
        inputs, labels = data
        out = model(inputs)
        # 返回最大值，及最大值的索引 1代表需要计算的维度
        _, predict = torch.max(out, 1)
        corrent += (predict == labels).sum(0)
    print('Test acc {0}'.format(corrent.item()/len(test_dataset)))

In [9]:
for epoch in range(10):
    print('epoch ',epoch)
    train()
    test()

epoch  0
Test acc 0.8897
epoch  1
Test acc 0.9039
epoch  2
Test acc 0.9069
epoch  3
Test acc 0.9117
epoch  4
Test acc 0.9146
epoch  5
Test acc 0.915
epoch  6
Test acc 0.918
epoch  7
Test acc 0.9186
epoch  8
Test acc 0.919
epoch  9
Test acc 0.9199


#### 交叉熵

In [10]:
LR = 0.5
model = Net()
ce_loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), LR)

In [11]:
def train():
    for i, data in enumerate(train_loader):
        inputs, labels = data
        out = model(inputs)
        loss = ce_loss(out, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [12]:
for epoch in range(10):
    print('epoch ',epoch)
    train()
    test()

epoch  0
Test acc 0.903
epoch  1
Test acc 0.9123
epoch  2
Test acc 0.9173
epoch  3
Test acc 0.9182
epoch  4
Test acc 0.9189
epoch  5
Test acc 0.9226
epoch  6
Test acc 0.9219
epoch  7
Test acc 0.9244
epoch  8
Test acc 0.9241
epoch  9
Test acc 0.9241


### 添加Dropout
修改网络结构

In [17]:
class Net_1(nn.Module):
    def __init__(self):
        super(Net_1, self).__init__()
        self.layers1 = nn.Sequential(
            nn.Linear(784, 500), 
            nn.Dropout(0.5),
            nn.Tanh())
        
        self.layers2 = nn.Sequential(
            nn.Linear(500, 200), 
            nn.Dropout(0.5),
            nn.Tanh())
        
        self.layers3 = nn.Sequential(
            nn.Linear(200, 10), 
            nn.Softmax(dim=1))
        
    def forward(self, x):
        x = x.view(x.size()[0], -1)
        x = self.layers1(x)
        x = self.layers2(x)
        x = self.layers3(x)
        return x

In [18]:
LR = 0.5
model = Net_1()
ce_loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), LR)

In [21]:
def train():
    # 模型的训练状态 dropout起作用
    model.train()
    for i, data in enumerate(train_loader):
        inputs, labels = data
        out = model(inputs)
        loss = ce_loss(out, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
def test():
    # 测试状态 dropout不起作用
    model.eval()
    corrent = 0
    for i, data in enumerate(test_loader):
        inputs, labels = data
        out = model(inputs)
        # 返回最大值，及最大值的索引 1代表需要计算的维度
        _, predict = torch.max(out, 1)
        corrent += (predict == labels).sum(0)
    print('Test acc {0}'.format(corrent.item()/len(test_dataset)))
    
    corrent = 0
    for i, data in enumerate(train_loader):
        inputs, labels = data
        out = model(inputs)
        # 返回最大值，及最大值的索引 1代表需要计算的维度
        _, predict = torch.max(out, 1)
        corrent += (predict == labels).sum(0)
    print('Train acc {0}'.format(corrent.item()/len(train_dataset)))

In [22]:
for epoch in range(10):
    print('epoch ',epoch)
    train()
    test()

epoch  0
Test acc 0.9359
Train acc 0.9391
epoch  1
Test acc 0.9438
Train acc 0.9439833333333333
epoch  2
Test acc 0.9452
Train acc 0.9468833333333333
epoch  3
Test acc 0.9511
Train acc 0.9556
epoch  4
Test acc 0.9536
Train acc 0.9566833333333333
epoch  5
Test acc 0.9546
Train acc 0.9606333333333333
epoch  6
Test acc 0.9551
Train acc 0.9608333333333333
epoch  7
Test acc 0.9595
Train acc 0.9641
epoch  8
Test acc 0.9587
Train acc 0.9657833333333333
epoch  9
Test acc 0.9594
Train acc 0.9673666666666667


### 正则化

In [23]:
LR = 0.5
model = Net_1()
ce_loss = nn.CrossEntropyLoss()
# 设置L2正则参数
optimizer = optim.SGD(model.parameters(), LR, weight_decay=0.001)

In [24]:
for epoch in range(10):
    print('epoch ',epoch)
    train()
    test()

epoch  0
Test acc 0.9063
Train acc 0.9032166666666667
epoch  1
Test acc 0.9182
Train acc 0.9152333333333333
epoch  2
Test acc 0.9262
Train acc 0.9241833333333334
epoch  3
Test acc 0.9095
Train acc 0.9123333333333333
epoch  4
Test acc 0.9226
Train acc 0.92465
epoch  5
Test acc 0.9242
Train acc 0.9233
epoch  6
Test acc 0.9337
Train acc 0.9339666666666666
epoch  7
Test acc 0.9294
Train acc 0.9296666666666666
epoch  8
Test acc 0.9096
Train acc 0.9101666666666667
epoch  9
Test acc 0.9282
Train acc 0.9291166666666667


可以看到用了正则化反而正确率不是很高了

#### 不同的优化器

In [25]:
LR = 0.001
model = Net_1()
ce_loss = nn.CrossEntropyLoss()
# 设置L2正则参数
optimizer = optim.Adam(model.parameters(), LR)

In [26]:
for epoch in range(10):
    print('epoch ',epoch)
    train()
    test()

epoch  0
Test acc 0.9259
Train acc 0.9299833333333334
epoch  1
Test acc 0.9436
Train acc 0.94675
epoch  2
Test acc 0.9521
Train acc 0.9563166666666667
epoch  3
Test acc 0.9569
Train acc 0.9600833333333333
epoch  4
Test acc 0.9584
Train acc 0.9629666666666666
epoch  5
Test acc 0.9616
Train acc 0.9665333333333334
epoch  6
Test acc 0.9631
Train acc 0.968
epoch  7
Test acc 0.9654
Train acc 0.9704666666666667
epoch  8
Test acc 0.9632
Train acc 0.9701833333333333
epoch  9
Test acc 0.9653
Train acc 0.9721666666666666
