In [26]:
import torch
import torch.nn as nn

net=nn.Sequential(
    nn.Linear(20,256),
    nn.ReLU(),
    nn.Linear(256,10)
)
x=torch.randn(2,20)
y=net(x)
print(y)
#自定义块
#不继承nn.Module,模型失去PyTorch的很多功能,比如参数管理、保存和加载等
#没有__init_()方法,forward()方法，无法定义可学习的参数，连基本的网络骨架都无法搭建

class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden=nn.Linear(20,256)
        self.out=nn.Linear(256,10)
    def forward(self,x):
        y=self.hidden(x)
        y=torch.relu(y)
        y=self.out(y)
        return y
net=MLP()#实例化,类是模板,对象才是实体，类是设计图，对象是盖出来的房子
#实例化在内存中分配独立空间，初始化专属属性，获得类的方法使用权
y=net(x)
print("y=",y)
#自定义顺序块
#enumerate()函数,同时获取索引和值
#self._modules属性,存储子模块的有序字典
#self._modules.values()方法,按添加顺序返回所有子模块
class MySequential(nn.Module):
    def __init__(self,*args):
        super().__init__()
        for idx,module in enumerate(args):
            self._modules[str(idx)]=module
            #self.add_module(str(idx),module)等价于上面一句
    def forward(self,x):
        for module in self._modules.values():
            x=module(x)
        return x

net=MySequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
net(x)
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight=torch.rand((20,20),requires_grad=False)
        self.linear=nn.Linear(20,20)
    def forward(self,x):
        x=self.linear(x)
        x=torch.mm(x,self.rand_weight)+1
        x=torch.relu(x)
        #x=torch.relu(torch.mm(x,self.rand_weight)+1)
        x=self.linear(x)
        while x.abs().sum()>1:
            x/=2
        return x.sum()
net=FixedHiddenMLP()
print("FixedHiddenMLP=",net(x) )
#混合搭配
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net=nn.Sequential(nn.Linear(20,64),nn.ReLU(),nn.Linear(64,32),nn.ReLU())
        self.linear=nn.Linear(32,16)
    def forward(self,x):
        x=self.net(x)
        x=self.linear(x)
        return x
chimera=nn.Sequential(NestMLP(),nn.Linear(16,20),FixedHiddenMLP())
print("chimera=",chimera(x))
#从嵌套块收集参数
def block1():
    return nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,4),nn.ReLU())
def block2():
    net=nn.Sequential()
    for i in range(4):
        net.add_module(f'block {i}',block1())
    return net
rgnet=nn.Sequential(block2(),nn.Linear(4,1))
x=torch.randn(2,4)
y=rgnet(x)
print("rgnet=",y)
print(rgnet)
print("rgnet[0]=",rgnet[0])
print("rgnet[0][1]=",rgnet[0][1])
print("rgnet[0][1][0]=",rgnet[0][1][0])
print("rgnet[0][1][0].weight=",rgnet[0][1][0].weight)
print(rgnet[0][1][0].bias.data)

#初始化参数
def init_normal(m):
    if type(m)==nn.Linear:
        nn.init.normal_(m.weight,mean=0,std=0.01)
        nn.init.zeros_(m.bias)
def init_constant(m):
    if type(m)==nn.Linear:
        nn.init.constant_(m.weight,1)
        nn.init.zeros_(m.bias)
net=nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))
net.apply(init_normal)

print("net[0].weight=",net[0].weight.data[0])

def init_xavier(m):
    if type(m)==nn.Linear:
        nn.init.xavier_uniform_(m.weight)
def init_42(m):
    if type(m)==nn.Linear:
        nn.init.constant_(m.weight,42)
net[0].apply(init_xavier)
net[2].apply(init_42)
print("net[0].weight=",net[0].weight.data[0])
print("net[2].weight=",net[2].weight.data)
def my_init(m):
    if type(m)==nn.Linear:
        print("Init",*[(name,param.shape) for name,param in m.named_parameters()][0])
        nn.init.uniform_(m.weight,-10,10)
        m.weight.data *=m.weight.data.abs()>=5
net.apply(my_init)
net[0].weight[:2]

tensor([[-0.1281, -0.1629,  0.2090, -0.0678, -0.5984, -0.1955,  0.3527,  0.0543,
          0.0136, -0.2148],
        [ 0.0316,  0.0547,  0.1158, -0.3020, -0.1892, -0.1744,  0.3216, -0.1953,
          0.2379, -0.1289]], grad_fn=<AddmmBackward0>)
y= tensor([[-0.1403, -0.0502, -0.2725, -0.1243,  0.3843, -0.1088,  0.0984,  0.0310,
         -0.2574,  0.2286],
        [-0.3636, -0.0896,  0.0646,  0.0351,  0.4914,  0.1473,  0.1491, -0.2169,
          0.3327, -0.0761]], grad_fn=<AddmmBackward0>)
FixedHiddenMLP= tensor(-0.0090, grad_fn=<SumBackward0>)
chimera= tensor(0.1423, grad_fn=<SumBackward0>)
rgnet= tensor([[0.3622],
        [0.3622]], grad_fn=<AddmmBackward0>)
Sequential(
  (0): Sequential(
    (block 0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2

tensor([[-8.7140,  0.0000,  0.0000, -7.2266],
        [ 7.0398,  5.1997, -0.0000,  0.0000]], grad_fn=<SliceBackward0>)

**5.2  参数管理**

In [27]:
net=nn.Sequential(
    nn.Linear(4,8),
    nn.ReLU(), 
    nn.Linear(8,1)
)
x=torch.randn(2,4)
y=net(x)
print(y)
print(net[2].state_dict())#访问模型参数
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data)
net[2].weight.grad==None
print([(name,param.shape) for name,param in net[0].named_parameters()])#访问所有模型参数
print(*[(name,param.shape) for name,param in net[0].named_parameters()])#访问所有模型参数
print((name,param.shape) for name,param in net.named_parameters())#访问所有模型参数

#参数绑定
shared=nn.Linear(8,8)
net=nn.Sequential(
    nn.Linear(4,8),
    nn.ReLU(),
    shared,
    nn.ReLU(),
    shared,
    nn.ReLU(),
    nn.Linear(8,1)
)
print(net[2].weight.data[0]==net[4].weight.data[0])

tensor([[ 0.4812],
        [-0.3488]], grad_fn=<AddmmBackward0>)
OrderedDict([('weight', tensor([[ 0.2345,  0.3185, -0.2397, -0.2912, -0.2853, -0.3319,  0.0909,  0.1739]])), ('bias', tensor([-0.0305]))])
<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.0305], requires_grad=True)
tensor([-0.0305])
[('weight', torch.Size([8, 4])), ('bias', torch.Size([8]))]
('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
<generator object <genexpr> at 0x7a706aa735a0>
tensor([True, True, True, True, True, True, True, True])


**5.4  自定义层**

In [2]:
import torch
import torch.nn.functional as F
from torch import nn
 
class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()
        
    def forward(self,x):
        return x-x.mean()
layer=CenteredLayer()
print(layer(torch.FloatTensor([1,2,3,4,5])))
class MyLinear(nn.Module):
    def __init__(self,in_units,out_units):
        super().__init__()
    
        self.weight=nn.Parameter(torch.randn(in_units,out_units))
        self.bias=nn.Parameter(torch.randn(out_units,))
    def forward(self,x):
        x=torch.matmul(x,self.weight)+self.bias
        return F.relu(x)
linear=MyLinear(5,3)
print(linear.weight)


tensor([-2., -1.,  0.,  1.,  2.])
Parameter containing:
tensor([[-2.1974,  0.6721,  1.8472],
        [ 0.6637,  0.7572, -1.7577],
        [ 0.9021,  1.1366, -1.1014],
        [-0.2501,  2.3138,  0.3873],
        [-1.0540, -0.5400, -0.2885]], requires_grad=True)


**5.5  读写文件**

In [10]:
import torch,os
from torch import nn
from torch.nn import functional as F


class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden=MyLinear(20,256)
        self.out=MyLinear(256,10)
    def forward(self,x):
        return self.out(self.hidden(x))


net=MLP()
x=torch.randn(2,20)
y=net(x)
path=r"/home/pumengyu/2025_9python/limu/d2l"
torch.save(net.state_dict(),os.path.join(path,'mlp.params'))

clone=MLP()
clone.load_state_dict(torch.load(os.path.join(path,'mlp.params')))
clone.eval()
y_clone=clone(x)
print(y_clone==y)
print(torch.cuda.device_count())
torch.cuda.device_count()
x=torch.randn(2,20)
print(x.device)
def try_gpu(i=0):
    if torch.cuda.device_count()>=i+1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')

x1=torch.ones(2,3,device=try_gpu())
print("x1.device=",x1.device)
x2=torch.randn(2,3,device=try_gpu(1))
print("x2.device=",x2.device)
net=nn.Sequential(nn.Linear(3,1))
print("net.device=",net[0].weight.data.device)

tensor([[True, True, True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True]])
2
cpu
x1.device= cuda:0
x2.device= cuda:1
net.device= cpu
