In [6]:
# 从零开始实现 dropout
import torch
import torch.nn as nn
import numpy as np
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l

def dropout(x,drop_prob):
    X = x.float()
    assert 0<=drop_prob<=1
    keep_prob = 1 - drop_prob
    if keep_prob == 0:
        return torch.zeros_like(x)
    mask = (torch.rand(X.shape) < keep_prob).float()
    return mask*X/keep_prob

# a = torch.tensor([1,2,3])
# print(torch.zeros_like(a))
# print(dropout(torch.arange(16).view((4,4)),0.3))

# 定义模型参数
num_inputs,num_outputs,num_hidden1,num_hidden2 = 784,10,256,256

w1 = torch.tensor(np.random.normal(0,0.01,size = (num_inputs,num_hidden1)),dtype = torch.float,requires_grad = True)
b1 = torch.zeros(num_hidden1,requires_grad = True) 

w2 = torch.tensor(np.random.normal(0,0.01,size = (num_hidden1,num_hidden2)),dtype = torch.float,requires_grad = True)
b2 = torch.zeros(num_hidden2,requires_grad = True) 

w3 = torch.tensor(np.random.normal(0,0.01,size = (num_hidden2,num_outputs)),dtype = torch.float,requires_grad = True)
b3 = torch.zeros(num_outputs,requires_grad = True) 

params = [w1,b1,w2,b2,w3,b3]

# 定义模型
drop_prob1,drop_prob2 = 0.2,0.5
def net(x,is_training = True):
    x = x.view((-1,num_inputs))
    H1 = (torch.matmul(x,w1) + b1).relu()
    if is_training:
        H1 = dropout(H1,drop_prob1)
    
    H2 = (torch.matmul(H1,w2) + b2).relu()
    if is_training:
        H2 = dropout(H2,drop_prob2)
    return torch.matmul(H2,w3) + b3

def evaluate_accuracy(data_iter,net):
    acc_sum,n = 0.0,0
    for X,y in data_iter:
        if isinstance(net,torch.nn.Module):
            net.eval()
            acc_sum += (net(X).argmax(dim=1)==y).float.sum().item()
            net.train()
        else:
            if('is_training' in net.__code__.co_varnames):
                #如果有这个is_training这个参数，设置成False
                acc_sum += (net(X,is_training=False).argmax(dim=1)==y).float().sum().item()
            else:
                acc_sum += (net(X).argmax(dim=1)==y).float().sum().item()

        n += y.shape[0]
    return acc_sum/n


# 训练和测试模型

num_epochs,lr,batch_size = 5,100.0,256
loss = torch.nn.CrossEntropyLoss()
train_iter,test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr)

epoch 1, loss 0.0046, train acc 0.540, test acc 0.721
epoch 2, loss 0.0023, train acc 0.784, test acc 0.777
epoch 3, loss 0.0019, train acc 0.821, test acc 0.809
epoch 4, loss 0.0018, train acc 0.836, test acc 0.830
epoch 5, loss 0.0016, train acc 0.847, test acc 0.837
