<a href="https://colab.research.google.com/github/liuxiao916/Dive_into_DL_PyTorch/blob/main/3_13_%E4%B8%A2%E5%BC%83%E6%B3%95.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 3.13 丢弃法

### 3.12.2 从零开始实现

In [1]:
import torch
import numpy as np

def dropout(X, drop_prob):
    assert 0<=drop_prob<=1
    keep_prob = 1- drop_prob
    if keep_prob==0:
        return torch.zeros_like(X)
    mask = torch.tensor(np.random.uniform(0,1,X.shape)<keep_prob)
    return mask*X/keep_prob

In [2]:
X = torch.arange(16).reshape((2,8))
dropout(X, 0)

tensor([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11., 12., 13., 14., 15.]])

In [3]:
dropout(X, 0.5)

tensor([[ 0.,  0.,  0.,  6.,  0.,  0.,  0., 14.],
        [ 0.,  0.,  0., 22., 24.,  0.,  0., 30.]])

In [4]:
dropout(X, 1)

tensor([[0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0]])

1. 定义模型参数

In [5]:
num_inputs, num_outputs, num_hiddens1,num_hiddens2 = 784, 10, 256, 256

W1 = torch.tensor(np.random.normal(0, 0.01, size=(num_inputs, num_hiddens1)), dtype=torch.float, requires_grad=True)
b1 = torch.zeros(num_hiddens1, requires_grad=True)
W2 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens1, num_hiddens2)), dtype=torch.float, requires_grad=True)
b2 = torch.zeros(num_hiddens2, requires_grad=True)
W3 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens2, num_outputs)), dtype=torch.float, requires_grad=True)
b3 = torch.zeros(num_outputs, requires_grad=True)

params = [W1, b1, W2, b2, W3, b3]

2. 定义模型

In [6]:
drop_prob1, drop_prob2 = 0.2, 0.5

def net(X, is_training=True):
    X = X.view(-1,num_inputs)
    H1 = (torch.mm(X,W1)+b1).relu()
    if is_training:
        H1 = dropout(H1, drop_prob1)
    H2 = (torch.mm(H1,W2)+b2).relu()
    if is_training:
        H2 = dropout(H2, drop_prob2)
    return torch.mm(H2,W3) + b3

3. 训练和测试模型

In [7]:
num_epochs, lr, Batch_size = 5, 0.5, 256
loss = torch.nn.CrossEntropyLoss()
import torchvision
fashionmnist_train = torchvision.datasets.FashionMNIST(root = './data/fashionMNIST', train = True, transform=torchvision.transforms.ToTensor(),download=True)
fashionmnist_train_dataloader = torch.utils.data.DataLoader(dataset= fashionmnist_train,batch_size = Batch_size, shuffle = True)
fashionmnist_test = torchvision.datasets.FashionMNIST(root = './data/fashionMNIST', train = False, transform=torchvision.transforms.ToTensor(),download=True)
fashionmnist_test_dataloader = torch.utils.data.DataLoader(dataset= fashionmnist_test,batch_size = Batch_size, shuffle = True)

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [8]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0 
    for X,y in data_iter:
        acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() 
        n += y.shape[0]
    return acc_sum/n

In [9]:
def sgd(params, lr):
    for param in params:
        param.data = param.data - lr*param.grad

In [10]:
def train_ch3_1(net, train_iter,test_iter, loss, num_epochs, batch_size, params = None, lr = None, optimizer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n =0.0, 0.0, 0
        for X,y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()
            l.backward()
            sgd(params, lr)
            
            for param in params:
                param.grad.data.zero_()

            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

train_ch3_1(net, fashionmnist_train_dataloader, fashionmnist_test_dataloader, loss, num_epochs, Batch_size, params, lr) 

epoch 1, loss 0.0048, train acc 0.515, test acc 0.754
epoch 2, loss 0.0024, train acc 0.768, test acc 0.805
epoch 3, loss 0.0023, train acc 0.788, test acc 0.822
epoch 4, loss 0.0019, train acc 0.824, test acc 0.839
epoch 5, loss 0.0017, train acc 0.838, test acc 0.823


### 3.13.3 简介实现

In [11]:
net = torch.nn.Sequential()
net.add_module('Linear1', torch.nn.Linear(784,256))
net.add_module('ReLU1',torch.nn.ReLU())
net.add_module('Drop1',torch.nn.Dropout(drop_prob1))
net.add_module('Linear2', torch.nn.Linear(256,256))
net.add_module('ReLU2',torch.nn.ReLU())
net.add_module('Drop2',torch.nn.Dropout(drop_prob2))
net.add_module('Linear3', torch.nn.Linear(256,10))

In [12]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)

def evaluate_accuracy_torch(data_iter, net):
    acc_sum, n = 0.0, 0 
    for X,y in data_iter:
        net.eval()
        acc_sum += (net(X.reshape(-1,num_inputs)).argmax(dim=1) == y).float().sum().item() 
        net.train()
        n += y.shape[0]
    return acc_sum/n

In [13]:
num_epochs = 5

for epoch in range(num_epochs):
    train_l_sum, train_acc_sum, n =0.0, 0.0, 0
    for X,y in fashionmnist_train_dataloader:
        y_hat = net(X.reshape(-1,num_inputs))
        l = loss(y_hat, y).sum()
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
        train_l_sum += l.item()
        train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
        n += y.shape[0]
    test_acc = evaluate_accuracy_torch(fashionmnist_test_dataloader, net)
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

epoch 1, loss 0.0034, train acc 0.675, test acc 0.747
epoch 2, loss 0.0021, train acc 0.805, test acc 0.810
epoch 3, loss 0.0018, train acc 0.832, test acc 0.829
epoch 4, loss 0.0017, train acc 0.843, test acc 0.803
epoch 5, loss 0.0016, train acc 0.853, test acc 0.781
