In [17]:
import torch
import numpy as np

In [2]:
torch.cuda.is_available()

False

In [3]:
from torch.autograd import Variable

In [4]:
x = Variable(torch.ones(2,2), requires_grad=True)
y = x + 2
y

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)

In [7]:
tensor = torch.ones(4,4)

In [8]:
tensor[:,1] = 0
tensor

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])

In [9]:
# 张量拼接
t1 = torch.cat([tensor, tensor, tensor], dim=1)
t1

tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.]])

In [10]:
# 张量乘积
tensor.mul(tensor)

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])

In [11]:
# 张量乘积
tensor * tensor

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])

In [12]:
# 张量矩阵乘积
tensor.matmul(tensor.T)

tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]])

In [13]:
# 张量矩阵乘积
tensor @ tensor.T

tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]])

In [14]:
# 自动赋值
tensor.add_(5)

tensor([[6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.]])

In [15]:
tensor

tensor([[6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.]])

In [16]:
# tensor to numpy
tensor.numpy()

array([[6., 5., 6., 6.],
       [6., 5., 6., 6.],
       [6., 5., 6., 6.],
       [6., 5., 6., 6.]], dtype=float32)

In [18]:
# numpy to tensor
n = np.ones(5)
t = torch.from_numpy(n)
# 修改ndarray ,tensor 也随之改变 
np.add(n, 1, out=n)
t

tensor([2., 2., 2., 2., 2.], dtype=torch.float64)

# torch.autograd

In [19]:
import torch, torchvision

In [21]:
model = torchvision.models.resnet18(pretrained=True)
data = torch.rand(1,3,64,64)
labels = torch.rand(1, 1000)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /Users/jiabinwang/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100.0%


In [22]:
prediction = model(data)

In [23]:
loss = (prediction - labels).sum()
loss.backward()   # backward pass

In [24]:
# 定义优化器，学习率0.01，动量0.9
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

In [25]:
# 启动梯度下降
optim.step()  # gradient descent

## autograd 的微分

In [28]:
import torch
a = torch.tensor([2.,3.], requires_grad=True)
b = torch.tensor([6.,4.], requires_grad=True)
a

tensor([2., 3.], requires_grad=True)

In [29]:
Q = 3*a**3 - b**2
Q

tensor([-12.,  65.], grad_fn=<SubBackward0>)

##### torch.autograd跟踪所有将其requires_grad标志设置为True的张量的操作。 对于不需要梯度的张量，将此属性设置为False会将其从梯度计算 DAG 中排除。

##### 即使只有一个输入张量具有requires_grad=True，操作的输出张量也将需要梯度。


##### 在 NN 中，不计算梯度的参数通常称为冻结参数。 如果事先知道您不需要这些参数的梯度，则“冻结”模型的一部分很有用（通过减少自动梯度计算，这会带来一些性能优势）。


In [33]:
from torch import nn, optim
model = torchvision.models.resnet18(pretrained=True)

In [34]:
# # Freeze all the parameters in the network
for p in model.parameters():
    p.requires_grad = False

##### 假设我们要在具有 10 个标签的新数据集中微调模型。 在 resnet 中，分类器是最后一个线性层model.fc。 我们可以简单地将其替换为充当我们的分类器的新线性层（默认情况下未冻结）。

In [None]:
model.fc = nn.Linear(512,10)

##### 现在，除了model.fc的参数外，模型中的所有参数都将冻结。 计算梯度的唯一参数是model.fc的权重和偏差。

In [38]:
# Optimize only the classifier
optimizer = optim.SGD(model.fc.parameters(), lr=1e-2, momentum=0.9)

##### torch.no_grad()中的上下文管理器可以使用相同的排除功能。

In [45]:
import torch

x = torch.ones(5)  # input tensor
y = torch.zeros(3)  # expected output
w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x, w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)

In [46]:
print(z.grad_fn)

<AddBackward0 object at 0x11e8a6c70>


In [47]:
loss.grad_fn

<BinaryCrossEntropyWithLogitsBackward0 at 0x16a0958b0>

In [48]:
loss.backward()

In [49]:
w.grad

tensor([[0.1054, 0.3304, 0.3227],
        [0.1054, 0.3304, 0.3227],
        [0.1054, 0.3304, 0.3227],
        [0.1054, 0.3304, 0.3227],
        [0.1054, 0.3304, 0.3227]])

In [50]:
b.grad

tensor([0.1054, 0.3304, 0.3227])

In [41]:
z = torch.matmul(x, w)+b
print(z.requires_grad)

with torch.no_grad():
    z = torch.matmul(x, w)+b
print(z.requires_grad)

True
False


##### 使用numpy创建一个y=10*x+4+noise(0,1)的数据，其中x是0到100的范围，以0.01进行等差数列
##### 使用pytorch定义w和b，并使用随机梯度下降，完成回归拟合。


In [51]:
import numpy as np
import torch.nn as nn
import torch
import torch.optim as optim

In [71]:
class Mlp(nn.Module):
    def __init__(self, w, b):
        super(Mlp, self).__init__()
        self.w = nn.Parameter(w)  # 初始化参数
        self.b = nn.Parameter(b)
        
    def forward(self, x):
        out = torch.matmul(x, self.w) + self.b
        return out

In [68]:
w = torch.tensor([[1.]])
b = torch.tensor([1.])
x = np.arange(0, 100, 0.01)
noise = np.random.normal(0, 1, len(x))  # (0,1)的高斯噪声
y = 10 * x + 4 + noise

In [69]:
x = torch.from_numpy(x).float()
y = torch.from_numpy(y).float()
x = torch.unsqueeze(x, dim=1) # 转换[1,10000]为[10000,1]
y = torch.unsqueeze(y, dim=1)  # 转换[1,10000]为[10000,1]

In [72]:
model = Mlp(w, b)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=1e-4)
for epoch in range(10000):
    y_pre = model(x)
    loss = criterion(y_pre, y)
    if epoch % 1000 == 0:
        print(f'epoch:{epoch}, loss is {loss}')
    
    optimizer.zero_grad() # 梯度清零
    loss.backward()  # 反向传播计算梯度
    optimizer.step() # 更新参数


epoch:0, loss is 272658.625
epoch:1000, loss is 2.878840923309326
epoch:2000, loss is 2.6988868713378906
epoch:3000, loss is 2.536067008972168
epoch:4000, loss is 2.388726234436035
epoch:5000, loss is 2.2554266452789307
epoch:6000, loss is 2.1348049640655518
epoch:7000, loss is 2.0256617069244385
epoch:8000, loss is 1.9269031286239624
epoch:9000, loss is 1.8375498056411743


In [73]:
print('w的值为', model.w.item())
print('b的值为', model.b.item())

w的值为 10.025729179382324
b的值为 2.273160219192505


##### 在pytorch中使用矩阵乘法实现全连接层
##### 在pytorch中使用nn.Linear层

In [98]:
import torch
import torch.nn as nn

class Myliner(nn.Module):
    def __init__(self, in_features, out_features):
        super(Myliner, self).__init__()
        self.w = nn.Parameter(torch.Tensor(out_features, in_features))
        self.b = nn.Parameter(torch.Tensor(out_features))
        
    def forward(self, x):
        out = x @ self.w.t() + self.b
        return out

In [100]:
model = Myliner(784, 10)
x = torch.rand(100, 784)
out = model(x)

for n, p in model.named_parameters():
    print(f'{n}:{p.shape}')
# out

w:torch.Size([10, 784])
b:torch.Size([10])


In [102]:
import torch 
import torch.nn as nn

class linear(nn.Module):
    def __init__(self, in_features, out_features):
        super(linear, self).__init__()
        self.fc = nn.Linear(in_features, out_features)
        
    def forward(self, x):
        out = self.fc(x)
        return out


In [103]:
model = linear(784, 10)
x = torch.rand(100, 784)
out = model(x)

for n, p in model.named_parameters():
    print(f'{n}:{p.shape}')

fc.weight:torch.Size([10, 784])
fc.bias:torch.Size([10])


## 激活函数

In [105]:
def LeakyRelu(x, negative_slope=0.01, inplace=False):
    return max(0, x) + negative_slope * min(0, x)

def Relu(x, inplace=False):
    return max(0, x)

def Sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def LogSigmoid(x):
    return np.log(1.0 / (1.0 + np.exp(-x)))

def Tanh(x):
    return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))


## 卷积层

In [106]:
# 计算下如下卷积层的参数量
nn.Conv2d(            
        in_channels=1,            
        out_channels=32,            
        kernel_size=5,            
        stride=1,            
        padding=2
)

Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))

In [108]:
import torch.nn as nn
import torch

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(            
        in_channels=1,            
        out_channels=32,            
        kernel_size=5,            
        stride=1,            
        padding=2
        )
        
    def forward(self, x):
        return self.conv1(x)
    
model = Net()
# p = sum(map(lambda p:p.numel(), model.parameters()))
p = sum(p.numel() for p in list(model.parameters()))
p

832

# PyTorch常见的损失函数和优化器使用

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

class linear(nn.Module):
    def __init__(self, in_features, out_features):
        super(linear, self).__init__()
        self.fc = nn.Linear(in_features, out_features)
        
    def forward(self, x):
        out = self.fc(x)
        return out
    

In [5]:
x = torch.randn(4, 3)
w = torch.randint(5, 10, size=(3, 1), dtype=torch.float)
b = torch.tensor(5.)
noise = torch.randn(4, 1)
y = x @ w + b + noise

In [8]:
res = {}
for lr in [0.5, 0.1, 0.01]:
    best_loss = float('inf')
    best_epoch = 0
    model = linear(3, 1)
    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=lr)
    for epoch in range(10000):
        y_pre = model(x)
        loss = criterion(y_pre, y)
        if epoch % 1000 == 0:
            print(f'Epoch:{epoch},loss is:{loss}')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if loss < best_loss:
            best_loss = loss
            best_epoch = epoch
    res[lr] = {'loss':best_loss.item(), 'epoch':best_epoch}
    
print(res)

Epoch:0,loss is:11.14564037322998
Epoch:1000,loss is:4.220623850414995e-12
Epoch:2000,loss is:4.220623850414995e-12
Epoch:3000,loss is:4.220623850414995e-12
Epoch:4000,loss is:4.220623850414995e-12
Epoch:5000,loss is:4.220623850414995e-12
Epoch:6000,loss is:4.220623850414995e-12
Epoch:7000,loss is:4.220623850414995e-12
Epoch:8000,loss is:4.220623850414995e-12
Epoch:9000,loss is:4.220623850414995e-12
Epoch:0,loss is:12.907681465148926
Epoch:1000,loss is:4.901413558400236e-05
Epoch:2000,loss is:2.15831619243545e-09
Epoch:3000,loss is:1.6082424281194108e-10
Epoch:4000,loss is:1.6082424281194108e-10
Epoch:5000,loss is:1.6082424281194108e-10
Epoch:6000,loss is:1.6082424281194108e-10
Epoch:7000,loss is:1.6082424281194108e-10
Epoch:8000,loss is:1.6082424281194108e-10
Epoch:9000,loss is:1.6082424281194108e-10
Epoch:0,loss is:11.111713409423828
Epoch:1000,loss is:0.6043795347213745
Epoch:2000,loss is:0.2198248952627182
Epoch:3000,loss is:0.0800929069519043
Epoch:4000,loss is:0.02918194048106670

# PyTorch池化层和归一化层

In [9]:
import torch
import torch.nn as nn

x = torch.randn(10, 3, 32, 32)
avg = nn.AvgPool2d(3, 3)
avg(x).shape

torch.Size([10, 3, 10, 10])

In [10]:
maxp = nn.MaxPool2d(7, 3)
maxp(x).shape

torch.Size([10, 3, 9, 9])