卷积

In [None]:
import torch
import torch.nn as nn

input_feat = torch.tensor([[4,1,7,5],[4,4,2,5],[7,7,2,4],[1,0,2,4]], dtype=torch.float32)
input_feat = input_feat.unsqueeze(0).unsqueeze(0)
print(input_feat)
print(input_feat.shape)
conv2d = nn.Conv2d(1, 1, (2,2), stride=1, padding='same', bias=False)
print(conv2d.weight)
print(conv2d.bias)

kernels = torch.tensor([[[[1,0], [2,1]]]], dtype=torch.float32)
conv2d.weight = nn.Parameter(kernels, requires_grad=False)
print(conv2d.weight)
output= conv2d(input_feat)
print(output)

深度可分离卷积

In [None]:
import torch
import torch.nn as nn

x = torch.rand((3, 5, 5)).unsqueeze(0)
print(x, x.shape)
in_channels_dw = x.shape[1]
out_channels_dw = x.shape[1]
kernel_size = 3
dw = nn.Conv2d(in_channels_dw, out_channels_dw, kernel_size, 1, padding="same", groups=in_channels_dw)
in_channels_pw = out_channels_dw
out_channels_pw = 4
kernel_size_pw = 1
pw = nn.Conv2d(in_channels_pw, out_channels_pw, kernel_size_pw, 1)
out = dw(x)
print(out.shape)
out = pw(dw(x))
print(out, out.shape)

空洞卷积
- 常用于图像分割，不需要缩小特征图

In [None]:
x = torch.rand((3, 5, 5)).unsqueeze(0)
conv = nn.Conv2d(in_channels_dw, out_channels_dw, kernel_size, 1, dilation=2)
out = conv(x)
print(out.shape)

模型训练

In [None]:
import torch
from torch import nn
import numpy as np
import random
from matplotlib import pyplot as plt

class LinearModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(1))
        self.bias = nn.Parameter(torch.randn(1))
    def forward(self, input):
        return (input * self.weight) + self.bias

w = 2
b = 3
model = LinearModel()
x_train = np.random.randint(low=-10, high=10, size=30)
y_train = [w*x+b+random.randint(0,2) for x in x_train]
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, weight_decay=1e-2, momentum=0.9)
y_train = torch.tensor(y_train, dtype=torch.float32)
for _ in range(1000):
    input = torch.from_numpy(x_train)
    output = model(input)
    loss = nn.MSELoss()(output, y_train)
    model.zero_grad()
    loss.backward()
    optimizer.step()

for param in model.named_parameters():
    print(param)
x = np.asarray(range(-10,11))
pre_y = model(torch.from_numpy(x)).detach().numpy()
plt.plot(x, pre_y)
plt.plot(x_train, y_train, 'o')
plt.grid()
plt.show()
torch.save(model.state_dict(), './linear_model.pth')

In [None]:
import torch
from torch import nn

class LinearModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(1))
        self.bias = nn.Parameter(torch.randn(1))
    def forward(self, input):
        return (input * self.weight) + self.bias

model = LinearModel()
model.load_state_dict(torch.load('./linear_model.pth'))
model.eval()
for param in model.named_parameters():
    print(param)

torchvision模型微调

In [None]:
import torchvision.models as models
import torchvision.transforms as transforms
alexnet = models.alexnet(weights=models.AlexNet_Weights.IMAGENET1K_V1)
from PIL import Image
im = Image.open('dog.jpeg')
transform = transforms.Compose([
    transforms.RandomResizedCrop((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229, 0.224, 0.225])])
input_tensor = transform(im).unsqueeze(0)
print(alexnet(input_tensor).argmax())
# https://gist.github.com/maraoz/388eddec39d60c6d52d4
print(alexnet)
fc_in_features = alexnet.classifier[6].in_features
alexnet.classifier[6] = torch.nn.Linear(fc_in_features, 10)
print(alexnet)

In [None]:
import torchvision
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

cifar10_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transforms.ToTensor(),
        target_transform=None, download=True)
tensor_loader = DataLoader(dataset=cifar10_dataset, batch_size=32)
data, _ = next(iter(tensor_loader))
grid_tensor = torchvision.utils.make_grid(data, nrow=16, padding=2)
display(transforms.ToPILImage()(grid_tensor))

In [None]:
import torchvision
import torchvision.models as models
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torch

# 使用gpu训练
device = torch.device("cpu")
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
    # arm mac
    device = torch.device("mps")
print(device)

transform = transforms.Compose([
    transforms.RandomResizedCrop((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229, 0.224, 0.225])])
cifar10_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform,
        target_transform=None, download=True)
print(len(cifar10_dataset))
# tensor_loader = DataLoader(dataset=cifar10_dataset, batch_size=32, pin_memory=True, pin_memory_device="mps")
tensor_loader = DataLoader(dataset=cifar10_dataset, batch_size=100)
alexnet = models.alexnet(weights=models.AlexNet_Weights.IMAGENET1K_V1)
alexnet.classifier[6] = torch.nn.Linear(alexnet.classifier[6].in_features, 10)
alexnet = alexnet.to(device)
optimizer = torch.optim.SGD(alexnet.parameters(), lr=1e-4, weight_decay=1e-2, momentum=0.9)
loss_f = torch.nn.CrossEntropyLoss()
alexnet.train()
for epoch in range(3):
    for i, item in enumerate(tensor_loader):
        x, y = item[0].to(device), item[1].to(device)
        output = alexnet(x)
        loss = loss_f(output, y)
        if i % 10 == 0:
            print('Epoch {}, Loss {}'.format(epoch+1,loss))
        alexnet.zero_grad()
        loss.backward()
        optimizer.step()

# 实践图像分类

In [None]:
import torchvision
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torch

class MyCNN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = torch.nn.Conv2d(3, 16, kernel_size=3)
        self.fc = torch.nn.Linear(16*222*222, 10)
    
    def forward(self, input):
        x:torch.Tensor = self.conv1(input)
        x = x.view(x.shape[0], -1)
        x = self.fc(x)
        return x

# 使用gpu训练
device = torch.device("cpu")
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(torch.cuda.device_count())
    print(torch.cuda.get_device_name(0))
elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
    # arm mac
    device = torch.device("mps")
print(device)

transform = transforms.Compose([
    transforms.RandomResizedCrop((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229, 0.224, 0.225])])
cifar10_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform,
        target_transform=None, download=True)
print(len(cifar10_dataset))
tensor_loader = DataLoader(dataset=cifar10_dataset, batch_size=100, num_workers=5)
net = MyCNN().to(device)
optimizer = torch.optim.SGD(net.parameters(), lr=5e-4, weight_decay=1e-2, momentum=0.8)
loss_f = torch.nn.CrossEntropyLoss()
net.train()
for epoch in range(3):
    for i, item in enumerate(tensor_loader):
        x, y = item[0].to(device), item[1].to(device)
        output = net(x)
        loss = loss_f(output, y)
        if i % 100 == 0:
            print('Epoch {}, Loss {}'.format(epoch+1,loss))
        net.zero_grad()
        loss.backward()
        optimizer.step()

torch.save(net, "mycnn.pth")

In [None]:
import torchvision
from torch.utils.data import DataLoader
import torch

net:torch.nn.Module = torch.load("mycnn.pth")
cifar10_test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform,
        target_transform=None, download=True)
print(len(cifar10_test_dataset))
test_tensor_loader = DataLoader(dataset=cifar10_test_dataset, batch_size=1000)
net.eval()
count = 0
for i, item in enumerate(test_tensor_loader):
    pre:torch.Tensor = net(item[0].to("mps"))
    for j, res in enumerate(pre):
        res = res.argmax()
        if res != item[1][j].item():
            count = count + 1
print(count)