# **Pytorch Note**

In [1]:
import torch

构建随机未初始化的矩阵 size为5*3

In [3]:
x = torch.empty(5,3)
x

tensor([[-2.9580e-33,  3.0822e-41,  3.3631e-44],
        [ 0.0000e+00,         nan,  0.0000e+00],
        [ 4.4721e+21,  1.5956e+25,  4.7399e+16],
        [ 3.7293e-08,  1.4838e-41,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00]])

在0-1随机初始化矩阵

In [14]:
x = torch.rand(5,3)
x

tensor([[0.7348, 0.1253, 0.7434],
        [0.1478, 0.0506, 0.2457],
        [0.6034, 0.5065, 0.4468],
        [0.0773, 0.0470, 0.1988],
        [0.5263, 0.9421, 0.1840]])

构建一个全部为0 类型为long的矩阵

In [9]:
x = torch.zeros(5,3,dtype=torch.long)
x


tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

从数据直接构建tensor

In [7]:
x = torch.tensor([5,5,3])
x

tensor([5, 5, 3])

利用原有tensor构建新tensor

In [8]:
x = x.new_ones(5,3)
x

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)

得到形状相同的tensor

In [10]:
x = torch.randn_like(x,dtype=torch.float)
x

tensor([[-0.8279, -0.2552,  0.9544],
        [-0.0385,  0.5376, -0.5223],
        [-1.4302, -1.2234, -1.1382],
        [ 0.7680,  0.1852, -0.6463],
        [-0.2558, -1.1903,  1.0131]])

获得tensor的size

In [11]:
x.size()

torch.Size([5, 3])

**Operation介绍**

加法

In [15]:
x = torch.rand(5,3)
y = torch.rand(5,3)
x
y

tensor([[4.8947e-01, 7.5013e-01, 8.1698e-01],
        [6.9753e-01, 5.8730e-01, 3.1334e-04],
        [9.1243e-02, 5.0363e-01, 7.7099e-01],
        [5.8149e-01, 9.4084e-03, 9.1270e-01],
        [4.2103e-01, 6.2405e-01, 4.8425e-01]])

In [16]:
x + y

tensor([[0.6305, 1.0551, 1.6968],
        [1.0692, 1.5418, 0.9777],
        [0.7846, 1.2300, 1.0307],
        [1.4538, 0.0449, 1.2372],
        [1.4084, 1.2771, 1.4291]])

In [17]:
torch.add(x,y)

tensor([[0.6305, 1.0551, 1.6968],
        [1.0692, 1.5418, 0.9777],
        [0.7846, 1.2300, 1.0307],
        [1.4538, 0.0449, 1.2372],
        [1.4084, 1.2771, 1.4291]])

In [18]:
result = torch.empty(5,3)
torch.add(x,y,out=result)
result

tensor([[0.6305, 1.0551, 1.6968],
        [1.0692, 1.5418, 0.9777],
        [0.7846, 1.2300, 1.0307],
        [1.4538, 0.0449, 1.2372],
        [1.4084, 1.2771, 1.4291]])

in-place加法

In [19]:
y.add_(x)
y

tensor([[0.6305, 1.0551, 1.6968],
        [1.0692, 1.5418, 0.9777],
        [0.7846, 1.2300, 1.0307],
        [1.4538, 0.0449, 1.2372],
        [1.4084, 1.2771, 1.4291]])

任何in-place的运算都会以 _ 结尾

如果希望resize一个tensor 可以使用torch.view:

In [23]:
x = torch.rand(4,4)
y = x.view(16)
z = x.view(-1,8)  #使用一个-1 可以自动计算其应该是什么值 如此时应该是2*8
z

tensor([[0.0885, 0.5177, 0.5828, 0.6383, 0.7980, 0.8906, 0.1653, 0.4193],
        [0.8483, 0.6918, 0.0247, 0.5177, 0.3395, 0.3442, 0.4719, 0.1178]])

如果只有一个元素的tensor 可以使用item()转化成python元素

In [24]:
x = torch.rand(1)
x.item()

0.09365296363830566

**CUDA Tensors**

使用.to方法 tensor可以被移动到别的device上

In [26]:
if torch.cuda.is_available():
  device = torch.device("cude")
  y = torch.ones_like(x,device=device)
  x = x.to(device)
  z = x + y
  z.to("cpu")

In [None]:
model = model.cuda()

# **用numpy实现两层神经网络**

In [None]:
一个全连接Relu神经网络，一个隐藏层，没有bias，用x预测y，使用L2 loss

h = W1 * X
a = max(0, h) #relu
y_pred = W2 * a

In [27]:
import numpy as np

In [None]:
N, D_in, H, D_out = 64, 1000, 100, 10

# 随机创建训练数据
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

LR = 1e-6

for it in range(500):
  # Forward
  h = x.dot(w1) #[N * D_in] * [D_in * H] = [N * H]
  h_relu = np.maximum(h, 0) # relu([N * H])
  y_pred = h_relu.dot(w2) # [N * H] * [H * D_out] = [N * D_out]

  # compute loss
  loss = np.square(y_pred - y).sum() #均方误差 L2
  print(it, loss)

  # Backward 
  # compute the gradient 链式求导 d(loss)/d(w)
  grad_y_pred = 2.0 * (y_pred - y) # d(loss)/d(y_pred) {loss = (y_pred-y)2}求导
  grad_w2 = h_relu.T.dot(grad_y_pred)
  
  grad_h_relu = grad_y_pred.dot(w2.T)
  grad_h = grad_h_relu.copy()
  grad_h[h<0] = 0
  grad_w1 = x.T.dot(grad_h)

  # update w 
  w1 -= LR * grad_w1
  w2 -= LR * grad_w2


# **使用pytorch进行训练**

In [45]:
import torch

In [None]:
N, D_in, H, D_out = 64, 1000, 100, 10

# 随机创建训练数据
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

w1 = torch.randn(D_in, H, requires_grad=True)
w2 = torch.randn(H, D_out, requires_grad=True)

LR = 1e-6
for it in range(500):
  # Forward
  y_pred = x.mm(w1).clamp(min=0).mm(w2)

  # compute loss
  loss = (y_pred - y).pow(2).sum() #均方误差 L2
  print(it, loss.item())

  # Backward 
  # compute the gradient 链式求导 d(loss)/d(w)
  loss.backward()

  # update w 
  with torch.no_grad(): # 不记住w1.grad和w2.grad
    w1 -= LR * w1.grad
    w2 -= LR * w2.grad
    w1.grad.zero_() #手动置0 防止grad叠加
    w2.grad.zero_()


**简单的autograd**

In [34]:
x = torch.tensor(1., requires_grad=True)
w = torch.tensor(2., requires_grad=True)
b = torch.tensor(3., requires_grad=True)

y = w * x + b

y.backward()

#dy / dw
print(w.grad)
print(x.grad)
print(b.grad)

tensor(1.)
tensor(2.)
tensor(1.)


**pytorch:nn**

In [None]:
import torch.nn as nn
N, D_in, H, D_out = 64, 1000, 100, 10

# 随机创建训练数据
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H, bias=False), # w1*x+b bias可以设置为false
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)

#model = model.to("cuda")
torch.nn.init.normal_(model[0].weight) #权重初始化为正态分布
torch.nn.init.normal_(model[2].weight)


loss_fn = nn.MSELoss(reduction='sum')

LR = 1e-6

#optimizer = torch.optim.Adam(model.parameters(), lr=LR)

for it in range(500):
  # Forward
  y_pred = model(x)

  # compute loss
  loss = loss_fn(y_pred, y) #均方误差 L2
  print(it, loss.item())

  # Backward 
  # compute the gradient 链式求导 d(loss)/d(w)
  loss.backward()

  # update w 
  with torch.no_grad(): # 不记住w1.grad和w2.grad
    for param in model.parameters():
      param -= LR * param.grad
  
  model.zero_grad()



引入了optimizer来计算权值

In [None]:
import torch.nn as nn
N, D_in, H, D_out = 64, 1000, 100, 10

# 随机创建训练数据
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H, bias=False), # w1*x+b bias可以设置为false
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)

#model = model.to("cuda")
#torch.nn.init.normal_(model[0].weight) #权重初始化为正态分布
#torch.nn.init.normal_(model[2].weight)


loss_fn = nn.MSELoss(reduction='sum')

LR = 1e-6

# 使用optimizer来进行权重优化 可以选用不同的算法Adam SGD等
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

for it in range(500):
  # Forward
  y_pred = model(x)

  # compute loss
  loss = loss_fn(y_pred, y) #均方误差 L2
  print(it, loss.item())


  optimizer.zero_grad()
  # Backward 
  # compute the gradient 链式求导 d(loss)/d(w)
  loss.backward()

  # update optimizer
  optimizer.step() 

使用class来定义网络结构 （实践使用类型）

In [None]:
import torch.nn as nn
N, D_in, H, D_out = 64, 1000, 100, 10

# 随机创建训练数据
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

class TwoLayerNet(torch.nn.Module):
  def __init__(self, D_in, D_out):
    super(TwoLayerNet, self).__init__()
    #定义结构
    self.Linear1 = torch.nn.Linear(D_in, H, bias=False)
    self.Linear2 = torch.nn.Linear(H, D_out)

  def forward(self, x):
    y_pred = self.Linear2(self.Linear1(x).clamp(min=0))
    return y_pred

model = TwoLayerNet(D_in, D_out)

loss_fn = nn.MSELoss(reduction='sum')

LR = 1e-4

# 使用optimizer来进行权重优化 可以选用不同的算法Adam SGD等
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

for it in range(500):
  # Forward
  y_pred = model(x)

  # compute loss
  loss = loss_fn(y_pred, y) #均方误差 L2
  print(it, loss.item())


  optimizer.zero_grad() #清空梯度
  # Backward 
  # compute the gradient 链式求导 d(loss)/d(w)
  loss.backward()

  # update optimizer
  optimizer.step() 