In [1]:
import torch
from torch import nn

In [2]:
nn.Transformer

torch.nn.modules.transformer.Transformer

In [3]:
linear = nn.Linear(in_features=3, out_features=8)

In [4]:
x = torch.randn(2, 6, 3)

In [5]:
linear(x).shape

torch.Size([2, 6, 8])

In [6]:
class Model(nn.Module):
    """
        经过三层全连接处理
            - 参数共享机制
                - 这三层是一样的！
                    - 只有一套参数
    """
    def __init__(self, num_features=128, N=3):
        super().__init__()
        self.N = N
        self.linear = nn.Linear(in_features=num_features, out_features=num_features)

    def forward(self, x):
        # 处理 3 次
        for _ in range(self.N):
            x = self.linear(x)
        return x

In [7]:
x = torch.randn(2, 128)

In [8]:
model = Model(num_features=128, N=3)

In [9]:
model

Model(
  (linear): Linear(in_features=128, out_features=128, bias=True)
)

In [10]:
model(x)

tensor([[ 2.6769e-01,  1.4558e-01,  1.2385e-01, -2.7182e-01, -4.2483e-01,
          1.6585e-01, -8.7145e-02, -3.7725e-01, -2.0215e-01, -3.2071e-01,
         -4.3592e-01,  2.0643e-02,  6.8118e-02,  1.8769e-01, -3.6750e-02,
         -4.0643e-01,  9.7860e-02,  9.3092e-04, -1.5914e-01, -1.5615e-01,
         -3.0598e-01,  1.3864e-01, -2.0336e-01, -3.0274e-01, -1.4094e-01,
          1.5895e-01, -6.3669e-02, -4.5859e-02, -2.1142e-01, -2.2416e-01,
          4.1219e-01,  1.1700e-01, -2.3621e-01, -3.7172e-01, -4.5366e-02,
          1.0757e-02, -9.0981e-02,  1.9621e-01,  4.0823e-01,  5.9777e-02,
          8.0034e-02, -4.9043e-02, -3.3780e-01,  3.2282e-01, -7.8119e-02,
          3.9792e-01,  9.1148e-02,  5.1778e-01, -1.8462e-01, -8.8155e-02,
          7.2463e-02,  3.6203e-01, -1.8076e-02, -3.3294e-02,  3.8355e-01,
         -2.7667e-02,  3.6559e-01, -8.6156e-02,  4.2546e-01,  2.2265e-01,
          1.7133e-01,  3.1340e-01,  3.5756e-02, -4.9017e-01,  2.0911e-02,
         -2.7597e-01, -1.2504e-01, -4.

In [11]:
for param in model.named_parameters():
    print(param)

('linear.weight', Parameter containing:
tensor([[-0.0144,  0.0310, -0.0470,  ...,  0.0622, -0.0232,  0.0271],
        [-0.0570,  0.0240,  0.0192,  ...,  0.0238, -0.0520,  0.0597],
        [ 0.0149,  0.0244,  0.0356,  ..., -0.0478, -0.0111, -0.0418],
        ...,
        [-0.0584, -0.0271, -0.0282,  ...,  0.0800,  0.0787,  0.0704],
        [ 0.0752,  0.0387, -0.0335,  ...,  0.0662, -0.0850, -0.0816],
        [-0.0089, -0.0844, -0.0538,  ...,  0.0750, -0.0276,  0.0557]],
       requires_grad=True))
('linear.bias', Parameter containing:
tensor([-0.0394,  0.0297,  0.0663, -0.0800,  0.0257,  0.0824, -0.0620, -0.0535,
        -0.0659,  0.0789,  0.0157, -0.0686, -0.0830, -0.0074,  0.0628, -0.0086,
         0.0182,  0.0396, -0.0374, -0.0253, -0.0815,  0.0682, -0.0336, -0.0431,
        -0.0276, -0.0577,  0.0135, -0.0667, -0.0246, -0.0456,  0.0650,  0.0446,
        -0.0282, -0.0454,  0.0163, -0.0423, -0.0154,  0.0534,  0.0425, -0.0862,
         0.0273,  0.0449, -0.0834,  0.0128, -0.0577, -0.0401

In [12]:
class Model(nn.Module):
    """
        经过三层全连接处理
            - 参数不共享
                - 这三层是不一样的！
                    - 有三套参数
    """
    def __init__(self, num_features=128, N=3):
        super().__init__()
        self.N = N
        self.linears = [nn.Linear(in_features=num_features, out_features=num_features) for _ in range(self.N)]

    def forward(self, x):
        # 处理 3 次
        for linear in self.linears:
            x = linear(x)
        return x

In [13]:
model = Model()

In [14]:
model

Model()

In [15]:
for param in model.named_parameters():
    print(param)

In [16]:
x = torch.randn(2, 128)

In [17]:
x

tensor([[-1.4687e+00,  6.2691e-01,  5.2719e-01, -2.4077e+00, -3.8215e-01,
         -1.0551e+00, -5.2441e-01,  4.1311e-01, -1.1686e+00, -6.4743e-01,
          4.1329e-01,  1.9177e+00, -8.2949e-01,  1.0678e+00,  4.7738e-01,
         -6.0747e-01,  1.7272e+00, -7.2716e-02,  7.7429e-01,  1.6697e-01,
          9.7245e-01, -1.3016e-01,  1.1373e+00, -7.4864e-01,  6.7882e-01,
         -6.2937e-01,  4.1224e-01, -1.5911e+00, -7.8197e-01,  8.6239e-02,
          1.7513e-01,  2.5594e+00,  7.2966e-01,  9.2092e-01, -1.9970e+00,
          8.3723e-01,  1.2103e+00, -1.3043e+00, -5.0547e-01, -5.6307e-01,
         -6.5003e-01, -1.0620e+00,  1.5667e+00,  2.3700e-02,  1.0671e+00,
         -7.0273e-01,  9.9557e-01,  2.0732e+00,  5.6930e-02, -1.1228e-01,
          9.5193e-02, -4.5813e-01, -2.6242e-01, -1.1765e+00,  2.3634e-01,
         -1.2327e+00, -1.6928e+00,  1.2956e+00, -6.0877e-01, -4.4051e-01,
         -3.2587e-01,  2.1848e+00,  1.3145e+00,  4.7540e-01,  3.1825e-03,
         -1.6771e-01, -2.0422e+00,  6.

In [18]:
model(x)

tensor([[ 0.4230, -0.1167, -0.0566, -0.2212,  0.4592,  0.2585, -0.0661,  0.0102,
         -0.0538, -0.1215, -0.3220, -0.3241,  0.0849,  0.3456,  0.0489, -0.0511,
          0.0487, -0.3052,  0.0173, -0.1965, -0.1257, -0.1657,  0.0845, -0.0301,
          0.2379,  0.0729,  0.0195, -0.0351,  0.0581, -0.0852, -0.0505, -0.1403,
         -0.2223, -0.3813, -0.1564, -0.0938, -0.2062, -0.0737,  0.3475,  0.0738,
          0.1530,  0.2731, -0.0547,  0.1088,  0.1309,  0.4943, -0.0899,  0.1657,
         -0.5540, -0.1240,  0.3403,  0.1100,  0.4108, -0.0398,  0.1532,  0.1937,
          0.0442, -0.0676, -0.0619,  0.1584, -0.4202,  0.0474, -0.1579, -0.0055,
          0.0335,  0.1530,  0.0554, -0.2404, -0.1138, -0.1308,  0.1335, -0.2923,
         -0.1868,  0.0788,  0.1943,  0.0696,  0.4930, -0.0752, -0.2762, -0.1908,
          0.3630,  0.0518,  0.2555, -0.0113, -0.0793, -0.0065,  0.0185,  0.2607,
          0.1768, -0.0294,  0.1236,  0.0240, -0.1956,  0.4975, -0.1174, -0.0738,
         -0.1587,  0.3928, -

In [19]:
class Model(nn.Module):
    """
        经过三层全连接处理
            - 参数不共享
                - 这三层是不一样的！
                    - 有三套参数
    """
    def __init__(self, num_features=128, N=3):
        super().__init__()
        self.N = N
        self.linears = nn.ModuleList([nn.Linear(in_features=num_features, out_features=num_features) for _ in range(self.N)])

    def forward(self, x):
        # 处理 3 次
        for linear in self.linears:
            x = linear(x)
        return x

In [20]:
model = Model()

In [21]:
model

Model(
  (linears): ModuleList(
    (0-2): 3 x Linear(in_features=128, out_features=128, bias=True)
  )
)

In [22]:
a = torch.randn(5)

In [23]:
a

tensor([-1.5191, -0.5966, -0.3786,  1.4897,  0.8624])

In [24]:
b = torch.randn(5, requires_grad=True)

In [25]:
b

tensor([ 1.1839,  1.7705,  0.2289, -0.9460,  0.6993], requires_grad=True)

In [26]:
torch.ones(5, requires_grad=True)

tensor([1., 1., 1., 1., 1.], requires_grad=True)

In [84]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.w = nn.Parameter(torch.ones(2, 5))
    def forward(self, x):
        return x @ self.w

In [85]:
model = Model()

In [86]:
x = torch.randn(12, 2)

In [87]:
model(x).shape

torch.Size([12, 5])

In [88]:
model

Model()

In [89]:
for param in model.named_parameters():
    print(param)

('w', Parameter containing:
tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]], requires_grad=True))
