In [1]:
import d2lzh as d2l
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import nn ,loss as gloss, data as gdata

def dropout(X, drop_prob):
    assert 0 <=drop_prob <=1
    keep_prob = 1 - drop_prob
    if keep_prob == 0:
        return X.zeros_like()
    mask = nd.random.uniform(0, 1, X.shape) < keep_prob
    return mask*X/keep_prob

In [2]:
# 定义模型参数
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256

W1 = nd.random.normal(scale=0.01, shape=(num_inputs, num_hiddens1))
b1 = nd.zeros(num_hiddens1)
W2 = nd.random.normal(scale=0.01, shape=(num_hiddens1, num_hiddens2))
b2 = nd.zeros(num_hiddens2)
W3 = nd.random.normal(scale=0.01, shape=(num_hiddens2, num_outputs))
b3 = nd.zeros(num_outputs)

params = [W1, b1, W2 , b2, W3, b3]
for param in params:
    param.attach_grad()


In [3]:
# 定义丢弃法模型
drop_prob1, drop_prob2 = 0.2,0.5  # 超参数

def net(X):
    X = X.reshape((-1, num_inputs))
    H1 = (nd.dot(X,W1)+b1).relu()
    if autograd.is_training():
        # 只在训练过程中使用丢弃法
        H1 = dropout(H1, drop_prob1)
    H2 = (nd.dot(H1, W2)+b2).relu()
    if autograd.is_training():
        H2 = dropout(H2, drop_prob2)
    O = nd.dot(H2, W3) + b3
    return O

In [4]:
# 训练模型和测试模型

In [5]:
num_epochs, batch_size,lr = 5, 256, 0.5
loss = gloss.SoftmaxCrossEntropyLoss()
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net, train_iter , test_iter, loss, num_epochs, batch_size, params, lr)
# 可以看到有效解决了过拟合的问题 深度学习中的多层神经网络经常用到这个方法

epoch 1, loss 1.1519, train acc 0.546, test acc 0.790
epoch 2, loss 0.5916, train acc 0.780, test acc 0.830
epoch 3, loss 0.4983, train acc 0.816, test acc 0.843
epoch 4, loss 0.4545, train acc 0.834, test acc 0.859
epoch 5, loss 0.4264, train acc 0.845, test acc 0.851


In [7]:
# 简易版
# 模型
net = nn.Sequential()
net.add(nn.Dense(256, activation="relu"),
        nn.Dropout(drop_prob1),
        nn.Dense(256, activation="relu"),
        nn.Dropout(drop_prob2),
        nn.Dense(10)
)
net.initialize(init.Normal(sigma=0.01))

In [9]:
trainer = gluon.Trainer(net.collect_params(),'sgd', {'learning_rate':lr})
d2l.train_ch3(net, train_iter , test_iter, loss, num_epochs, batch_size, None, None, trainer)

epoch 1, loss 1.2911, train acc 0.511, test acc 0.750
epoch 2, loss 0.6151, train acc 0.772, test acc 0.832
epoch 3, loss 0.5124, train acc 0.814, test acc 0.845
epoch 4, loss 0.4666, train acc 0.828, test acc 0.857
epoch 5, loss 0.4342, train acc 0.842, test acc 0.858
