权重衰减是减轻过拟合的一种常用方法，等价于L2范数正则化  
具体方法是引入L2范数惩罚项，主要是惩罚绝对值较大的参数从而限制模型  
以下是一个权重衰减从零实现缓解过拟合的实践

In [20]:
#这里用了一个高维线性回归来模拟过拟合的情况
%matplotlib inline
import torch
import torch.nn as nn
import numpy as np
import sys
import d2lzh_pytorch as d2l
n_train,n_test,num_inputs = 20,100,200
#训练数据的数量和测试数据的数量
true_w,true_b = torch.ones(num_inputs,1)*0.01,0.05
features = torch.randn((n_train+n_test,num_inputs))
labels = torch.matmul(features,true_w)+true_b
labels = torch.tensor(np.random.normal(0,0.01,size = labels.size()),dtype = torch.float)
train_features,test_features = features[:n_train,:],features[n_train:,:]
train_labels,test_labels = labels[:n_train],labels[n_train:]


In [21]:
#初始化模型参数
def init_params():
    w = torch.randn((num_inputs,1),requires_grad = True)
    b = torch.zeros(1,requires_grad= True)
    return [w,b]

In [22]:
#定义L2范数惩罚项
def l2_penalty(w):
    return (w**2).sum()/2

In [23]:
#定义训练和测试
batch_size,num_epochs,lr = 1,100,0.003
net = d2l.linreg
loss = d2l.squared_loss
dataset = torch.utils.data.TensorDataset(train_features,train_labels)
train_iter = torch.utils.data.DataLoader(dataset,batch_size,shuffle=True)
def fit_and_plot(lambd):
    w,b = init_params()
    train_ls,test_ls=[],[]
    for _ in range(num_epochs):
        for X,y in train_iter:
            l = loss(net(X,w,b),y)+lambd*l2_penalty(w)
            l = l.sum()
            if w.grad is not None:
                w.grad.data.zero_()
                b.grad.data.zero_()
            l.backward()
            d2l.sgd([w,b],lr,batch_size)
        train_ls.append(loss(net(train_features,w,b),train_labels).mean())
        test_ls.append(loss(net(test_features,w,b),test_labels).mean())
    d2l.semilogy(range(1,num_epochs+1),train_ls,'epochs','loss',
                 range(1,num_epochs+1),test_ls,['train','test'])
    print('L2 norm of w:',w.norm().item())
        

In [24]:
#如果lambd为0，就是没有使用权重衰减
fit_and_plot(0)

AttributeError: module 'd2lzh_pytorch' has no attribute 'semilogy'