In [16]:
import torch
torch.cuda.is_available()


True

In [17]:
import torch

print(torch.__version__)
print(torch.cuda.is_available())
print(torch.cuda.get_device_name())

2.2.2+cu121
True
NVIDIA GeForce RTX 2080 Ti


## Ret50

## Ret50 分类FashionMNIST数据集

In [18]:

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

class Bottleneck(nn.Module):
    def __init__(self, in_channels, channels, stride=1, use_1x1conv=False):
        super(Bottleneck,self).__init__()
        self.conv1 = nn.Conv2d(in_channels, channels, kernel_size=1, stride=1, bias=False)
        self.bn1 = nn.BatchNorm2d(channels)
        self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(channels)
        self.conv3 = nn.Conv2d(channels, channels*4, kernel_size=1, stride=1, bias=False)
        self.bn3 = nn.BatchNorm2d(channels*4)

        if use_1x1conv:
            self.conv4 = nn.Conv2d(
                in_channels, channels*4, kernel_size=1, stride=stride
            )
        else:
            self.conv4 = None

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))

        if  self.conv4 == None:
            identity = x
        else:
            identity = self.conv4(x)

        out += identity
        return F.relu(out)

def bottleneck_block(in_channels, channels, num_bottlenecks, stride=1):
    blk = []
    for i in range(num_bottlenecks):
        if i == 0:
            blk.append(Bottleneck(in_channels, channels, stride=stride, use_1x1conv=True))
        else:
            blk.append(Bottleneck(channels*4, channels))
    return blk

def ResNet(block, layers, num_classes=10):
    def init_weights(self, m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.kaiming_normal_(m.weight)
    b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
                       nn.BatchNorm2d(64), nn.ReLU(),
                       nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

    b2 = nn.Sequential(*bottleneck_block(64, 64, layers[0], stride=1))
    b3 = nn.Sequential(*bottleneck_block(64*4, 128, layers[1], stride=2))
    b4 = nn.Sequential(*bottleneck_block(128*4, 256, layers[2], stride=2))
    b5 = nn.Sequential(*bottleneck_block(256*4, 512, layers[3], stride=2))

    resnet = nn.Sequential(
        b1, b2, b3, b4, b5,
        nn.AdaptiveAvgPool2d((1,1)),
        nn.Flatten(),
        nn.Linear(512*4, num_classes)
    )
    return resnet

def resnet50():
    return ResNet(Bottleneck, [3, 4, 6, 3])

def resnet101():
    return ResNet(Bottleneck, [3, 4, 23, 3])



def train_loss(net, train_iter, test_iter, epochs, loss, device, lr):
    net.to(device)
    writer = SummaryWriter()  # 创建 TensorBoard writer

    trainer = torch.optim.Adam(net.parameters(), lr=lr)
    for epoch in range(epochs):
        # 训练损失之和，训练准确率之和，样本数
        metric = d2l.Accumulator(3)
        net.train()
        timer, num_batches = d2l.Timer(), len(train_iter)
        for i, (X, y) in enumerate(train_iter):
            trainer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            with torch.no_grad():
                metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])

            train_loss = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]

            # 将训练损失和准确率写入 TensorBoard
            writer.add_scalar('Train/Loss', train_loss, epoch * num_batches + i)
            writer.add_scalar('Train/Accuracy', train_acc, epoch * num_batches + i)

            # 打印训练过程
            print(f'Epoch: {epoch+1}, Batch: {i+1}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')

        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        writer.add_scalar('Test/Accuracy', test_acc, epoch)  # 将测试准确率写入 TensorBoard

        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        writer.add_scalar('Test/Accuracy', test_acc, epoch)  # 将测试准确率写入 TensorBoard

    print(f'train loss {train_loss:.3f}, train acc {train_acc:.3f}, test acc {test_acc:.3f}')
    torch.save(net.state_dict(), f".\\model\\ResNet-50_Dict_FashionMNIST-10_Epoch{epochs}_Accuracy{test_acc*100:.2f}%.pth")
    torch.save(net, f".\\model\\ResNet-50_FashionMNIST-10_Epoch{epochs}_Accuracy{test_acc*100:.2f}%.pth")
    writer.close()  # 关闭 TensorBoard writer
    
    
# 加载FashionMNIST
trans24 = transforms.Compose(
    [transforms.ToTensor()])#28*28
train_dataset = datasets.FashionMNIST(
    root=r'./', train=True, transform=trans24, download=True)
test_dataset = datasets.FashionMNIST(
    root=r'./', train=False, transform=trans24, download=True)

# 配置数据加载器
batch_size = 128
train_loader64 = DataLoader(dataset=train_dataset,
                          batch_size=batch_size, shuffle=True)
test_loader64 = DataLoader(dataset=test_dataset,
                         batch_size=batch_size, shuffle=True)
def resnet50():
    return ResNet(Bottleneck, [3, 4, 6, 3])

def resnet110():
    return ResNet(Bottleneck, [18, 18, 18, 18])
# 开始训练
epochs, lr = 20, 0.001#epochs,lr = 20, 0.001
device = torch.device("cuda:0")
loss = nn.CrossEntropyLoss()
resnet50 = resnet50()
train_loss(resnet50, test_loader64, test_loader64, epochs, loss, device, lr)
# tensorboard 运行
# (mytorch) PS F:\DeepLearn\resnet\runs> tensorboard --logdir=Apr10_17-45-21_NikeLee
# 其中 'Apr10_17-45-21_NikeLee'是文件夹名称

Epoch: 1, Batch: 1, Train Loss: 2.6636, Train Acc: 0.0781
Epoch: 1, Batch: 2, Train Loss: 3.4807, Train Acc: 0.1016
Epoch: 1, Batch: 3, Train Loss: 4.2097, Train Acc: 0.0911
Epoch: 1, Batch: 4, Train Loss: 4.0664, Train Acc: 0.1191
Epoch: 1, Batch: 5, Train Loss: 3.9043, Train Acc: 0.1484
Epoch: 1, Batch: 6, Train Loss: 3.7906, Train Acc: 0.1771
Epoch: 1, Batch: 7, Train Loss: 3.6019, Train Acc: 0.1875
Epoch: 1, Batch: 8, Train Loss: 3.3676, Train Acc: 0.2168
Epoch: 1, Batch: 9, Train Loss: 3.2054, Train Acc: 0.2370
Epoch: 1, Batch: 10, Train Loss: 3.0426, Train Acc: 0.2594
Epoch: 1, Batch: 11, Train Loss: 2.9297, Train Acc: 0.2812
Epoch: 1, Batch: 12, Train Loss: 2.7861, Train Acc: 0.3047
Epoch: 1, Batch: 13, Train Loss: 2.6925, Train Acc: 0.3233
Epoch: 1, Batch: 14, Train Loss: 2.5843, Train Acc: 0.3449
Epoch: 1, Batch: 15, Train Loss: 2.4970, Train Acc: 0.3635
Epoch: 1, Batch: 16, Train Loss: 2.4036, Train Acc: 0.3804
Epoch: 1, Batch: 17, Train Loss: 2.3289, Train Acc: 0.3874
Epoch:

# CIFAR-10

In [37]:

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

class Bottleneck(nn.Module):
    def __init__(self, in_channels, channels, stride=1, use_1x1conv=False):
        super(Bottleneck,self).__init__()
        self.conv1 = nn.Conv2d(in_channels, channels, kernel_size=1, stride=1, bias=False)
        self.bn1 = nn.BatchNorm2d(channels)
        self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(channels)
        self.conv3 = nn.Conv2d(channels, channels*4, kernel_size=1, stride=1, bias=False)
        self.bn3 = nn.BatchNorm2d(channels*4)

        if use_1x1conv:
            self.conv4 = nn.Conv2d(
                in_channels, channels*4, kernel_size=1, stride=stride
            )
        else:
            self.conv4 = None

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))

        if  self.conv4 == None:
            identity = x
        else:
            identity = self.conv4(x)

        out += identity
        return F.relu(out)

def bottleneck_block(in_channels, channels, num_bottlenecks, stride=1):
    blk = []
    for i in range(num_bottlenecks):
        if i == 0:
            blk.append(Bottleneck(in_channels, channels, stride=stride, use_1x1conv=True))
        else:
            blk.append(Bottleneck(channels*4, channels))
    return blk


    b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
                       nn.BatchNorm2d(64), nn.ReLU(),
                       nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

    b2 = nn.Sequential(*bottleneck_block(64, 64, layers[0], stride=1))
    b3 = nn.Sequential(*bottleneck_block(64*4, 128, layers[1], stride=2))
    b4 = nn.Sequential(*bottleneck_block(128*4, 256, layers[2], stride=2))
    b5 = nn.Sequential(*bottleneck_block(256*4, 512, layers[3], stride=2))

    resnet = nn.Sequential(
        b1, b2, b3, b4, b5,
        nn.AdaptiveAvgPool2d((1,1)),
        nn.Flatten(),
        nn.Linear(512*4, num_classes)
    )
    return resnet

def resnet50():
    return ResNet(Bottleneck, [3, 4, 6, 3])

def resnet101():
    return ResNet(Bottleneck, [3, 4, 23, 3])



def train_loss(net, train_iter, test_iter, epochs, loss, device, lr):
    net.to(device)
    writer = SummaryWriter()  # 创建 TensorBoard writer

    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            # nn.init.xavier_uniform(m.weight)
            nn.init.kaiming_normal_(m.weight)
    net.apply(init_weights)
            
    trainer = torch.optim.Adam(net.parameters(), lr=lr)
    for epoch in range(epochs):
        # 训练损失之和，训练准确率之和，样本数
        metric = d2l.Accumulator(3)
        net.train()
        timer, num_batches = d2l.Timer(), len(train_iter)
        for i, (X, y) in enumerate(train_iter):
            trainer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            with torch.no_grad():
                metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])

            train_loss = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]

            # 将训练损失和准确率写入 TensorBoard
            writer.add_scalar('Train/Loss', train_loss, epoch * num_batches + i)
            writer.add_scalar('Train/Accuracy', train_acc, epoch * num_batches + i)

            # 打印训练过程
            print(f'Epoch: {epoch+1}, Batch: {i+1}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')

        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        writer.add_scalar('Test/Accuracy', test_acc, epoch)  # 将测试准确率写入 TensorBoard

        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        writer.add_scalar('Test/Accuracy', test_acc, epoch)  # 将测试准确率写入 TensorBoard

    print(f'train loss {train_loss:.3f}, train acc {train_acc:.3f}, test acc {test_acc:.3f}')
    torch.save(net.state_dict(), f".\\model\\ResNet-50_Dict_FashionMNIST-10_Epoch{epochs}_Accuracy{test_acc*100:.2f}%.pth")
    torch.save(net, f".\\model\\ResNet-50_FashionMNIST-10_Epoch{epochs}_Accuracy{test_acc*100:.2f}%.pth")
    writer.close()  # 关闭 TensorBoard writer
    
    
# 加载MNIST
trans24 = transforms.Compose(
    [transforms.ToTensor()])#28*28



train_dataset = datasets.MNIST(
    root=r'./', train=True, transform=trans24, download=True)
test_dataset = datasets.MNIST(
    root=r'./', train=False, transform=trans24, download=True)

# 配置数据加载器
batch_size = 128
train_loader64 = DataLoader(dataset=train_dataset,
                          batch_size=batch_size, shuffle=True)
test_loader64 = DataLoader(dataset=test_dataset,
                         batch_size=batch_size, shuffle=True)
def resnet50():
    return ResNet(Bottleneck, [3, 4, 6, 3])

def resnet110():
    return ResNet(Bottleneck, [18, 18, 18, 18])
# 开始训练
#epochs,lr = 20, 0.001
epochs, lr = 20, 0.001
device = torch.device("cuda:0")
loss = nn.CrossEntropyLoss()
resnet50 = resnet50()
train_loss(resnet50, test_loader64, test_loader64, epochs, loss, device, lr)
# tensorboard 运行
# (mytorch) PS F:\DeepLearn\resnet\runs> tensorboard --logdir=Apr10_17-45-21_NikeLee
# 其中 'Apr10_17-45-21_NikeLee'是文件夹名称

Epoch: 1, Batch: 1, Train Loss: 6.4479, Train Acc: 0.1094
Epoch: 1, Batch: 2, Train Loss: 9.8244, Train Acc: 0.1992
Epoch: 1, Batch: 3, Train Loss: 11.8972, Train Acc: 0.2292
Epoch: 1, Batch: 4, Train Loss: 12.8932, Train Acc: 0.2402
Epoch: 1, Batch: 5, Train Loss: 12.2660, Train Acc: 0.2516
Epoch: 1, Batch: 6, Train Loss: 10.9620, Train Acc: 0.2943
Epoch: 1, Batch: 7, Train Loss: 9.6736, Train Acc: 0.3393
Epoch: 1, Batch: 8, Train Loss: 8.7180, Train Acc: 0.3779
Epoch: 1, Batch: 9, Train Loss: 7.9866, Train Acc: 0.4071
Epoch: 1, Batch: 10, Train Loss: 7.3705, Train Acc: 0.4359
Epoch: 1, Batch: 11, Train Loss: 6.8977, Train Acc: 0.4581
Epoch: 1, Batch: 12, Train Loss: 6.4702, Train Acc: 0.4779
Epoch: 1, Batch: 13, Train Loss: 6.1345, Train Acc: 0.4928
Epoch: 1, Batch: 14, Train Loss: 5.7658, Train Acc: 0.5162
Epoch: 1, Batch: 15, Train Loss: 5.4420, Train Acc: 0.5333
Epoch: 1, Batch: 16, Train Loss: 5.1831, Train Acc: 0.5479
Epoch: 1, Batch: 17, Train Loss: 4.9849, Train Acc: 0.5579
Ep

# transform不一样

In [39]:

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

class Bottleneck(nn.Module):
    def __init__(self, in_channels, channels, stride=1, use_1x1conv=False):
        super(Bottleneck,self).__init__()
        self.conv1 = nn.Conv2d(in_channels, channels, kernel_size=1, stride=1, bias=False)
        self.bn1 = nn.BatchNorm2d(channels)
        self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(channels)
        self.conv3 = nn.Conv2d(channels, channels*4, kernel_size=1, stride=1, bias=False)
        self.bn3 = nn.BatchNorm2d(channels*4)

        if use_1x1conv:
            self.conv4 = nn.Conv2d(
                in_channels, channels*4, kernel_size=1, stride=stride
            )
        else:
            self.conv4 = None

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))

        if  self.conv4 == None:
            identity = x
        else:
            identity = self.conv4(x)

        out += identity
        return F.relu(out)

def bottleneck_block(in_channels, channels, num_bottlenecks, stride=1):
    blk = []
    for i in range(num_bottlenecks):
        if i == 0:
            blk.append(Bottleneck(in_channels, channels, stride=stride, use_1x1conv=True))
        else:
            blk.append(Bottleneck(channels*4, channels))
    return blk

def ResNet(block, layers, num_classes=10):
   
    b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
                       nn.BatchNorm2d(64), nn.ReLU(),
                       nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

    b2 = nn.Sequential(*bottleneck_block(64, 64, layers[0], stride=1))
    b3 = nn.Sequential(*bottleneck_block(64*4, 128, layers[1], stride=2))
    b4 = nn.Sequential(*bottleneck_block(128*4, 256, layers[2], stride=2))
    b5 = nn.Sequential(*bottleneck_block(256*4, 512, layers[3], stride=2))

    resnet = nn.Sequential(
        b1, b2, b3, b4, b5,
        nn.AdaptiveAvgPool2d((1,1)),
        nn.Flatten(),
        nn.Linear(512*4, num_classes)
    )
    return resnet

def resnet50():
    return ResNet(Bottleneck, [3, 4, 6, 3])

def resnet101():
    return ResNet(Bottleneck, [3, 4, 23, 3])



def train_loss(net, train_iter, test_iter, epochs, loss, device, lr):
    net.to(device)
    writer = SummaryWriter()  # 创建 TensorBoard writer
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            # nn.init.xavier_uniform(m.weight)
            nn.init.kaiming_normal_(m.weight)
    net.apply(init_weights)
    trainer = torch.optim.Adam(net.parameters(), lr=lr)
    for epoch in range(epochs):
        # 训练损失之和，训练准确率之和，样本数
        metric = d2l.Accumulator(3)
        net.train()
        timer, num_batches = d2l.Timer(), len(train_iter)
        for i, (X, y) in enumerate(train_iter):
            trainer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            with torch.no_grad():
                metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])

            train_loss = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]

            # 将训练损失和准确率写入 TensorBoard
            writer.add_scalar('Train/Loss', train_loss, epoch * num_batches + i)
            writer.add_scalar('Train/Accuracy', train_acc, epoch * num_batches + i)

            # 打印训练过程
            print(f'Epoch: {epoch+1}, Batch: {i+1}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')

        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        writer.add_scalar('Test/Accuracy', test_acc, epoch)  # 将测试准确率写入 TensorBoard

        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        writer.add_scalar('Test/Accuracy', test_acc, epoch)  # 将测试准确率写入 TensorBoard

    print(f'train loss {train_loss:.3f}, train acc {train_acc:.3f}, test acc {test_acc:.3f}')
    torch.save(net.state_dict(), f".\\model\\ResNet-50_Dict_FashionMNIST-10_Epoch{epochs}_Accuracy{test_acc*100:.2f}%.pth")
    torch.save(net, f".\\model\\ResNet-50_FashionMNIST-10_Epoch{epochs}_Accuracy{test_acc*100:.2f}%.pth")
    writer.close()  # 关闭 TensorBoard writer
    
    
# 加载MNIST
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])




train_dataset = datasets.MNIST(
    root=r'./', train=True, transform=trans24, download=True)
test_dataset = datasets.MNIST(
    root=r'./', train=False, transform=trans24, download=True)

# 配置数据加载器
batch_size = 4096
train_loader64 = DataLoader(dataset=train_dataset,
                          batch_size=batch_size, shuffle=True)
test_loader64 = DataLoader(dataset=test_dataset,
                         batch_size=batch_size, shuffle=True)

# 开始训练
#epochs,lr = 20, 0.001
epochs, lr = 20, 0.001
device = torch.device("cuda:0")
loss = nn.CrossEntropyLoss()
resnet101 = resnet101()
train_loss(resnet101, test_loader64, test_loader64, epochs, loss, device, lr)
# tensorboard 运行
# (mytorch) PS F:\DeepLearn\resnet\runs> tensorboard --logdir=Apr10_17-45-21_NikeLee
# 其中 'Apr10_17-45-21_NikeLee'是文件夹名称

Epoch: 1, Batch: 1, Train Loss: 8.8769, Train Acc: 0.0935
Epoch: 1, Batch: 2, Train Loss: 23.2924, Train Acc: 0.1036
Epoch: 1, Batch: 3, Train Loss: 27.4031, Train Acc: 0.1396
Epoch: 2, Batch: 1, Train Loss: 46.1325, Train Acc: 0.3682
Epoch: 2, Batch: 2, Train Loss: 38.9593, Train Acc: 0.3694
Epoch: 2, Batch: 3, Train Loss: 34.4975, Train Acc: 0.3877
Epoch: 3, Batch: 1, Train Loss: 6.7529, Train Acc: 0.4521
Epoch: 3, Batch: 2, Train Loss: 4.3963, Train Acc: 0.5599
Epoch: 3, Batch: 3, Train Loss: 3.9440, Train Acc: 0.5803
Epoch: 4, Batch: 1, Train Loss: 0.8972, Train Acc: 0.7622
Epoch: 4, Batch: 2, Train Loss: 0.8061, Train Acc: 0.7819
Epoch: 4, Batch: 3, Train Loss: 0.7590, Train Acc: 0.7928
Epoch: 5, Batch: 1, Train Loss: 0.4295, Train Acc: 0.8740
Epoch: 5, Batch: 2, Train Loss: 0.3979, Train Acc: 0.8820
Epoch: 5, Batch: 3, Train Loss: 0.3834, Train Acc: 0.8853
Epoch: 6, Batch: 1, Train Loss: 0.2877, Train Acc: 0.9092
Epoch: 6, Batch: 2, Train Loss: 0.2658, Train Acc: 0.9143
Epoch: 6,

In [22]:
def count_layers(model):
    total_layers = sum(1 for _ in model.named_parameters())
    return total_layers
resnet50 = resnet101()
# 假设已经生成了 ResNet-50 模型，保存在变量 model 中
num_layers = count_layers(resnet101)
print("模型层数:", num_layers)

模型层数: 311


In [25]:
def count_layers(model):
    total_layers = sum(1 for _ in model.named_parameters())
    return total_layers
resnet50 = resnet50()
# 假设已经生成了 ResNet-50 模型，保存在变量 model 中
num_layers = count_layers(resnet50)
print("模型层数:", num_layers)

模型层数: 158


In [36]:
def count_residual_blocks(model):
    residual_blocks = []
    for name, module in model.named_modules():
        if isinstance(module, Bottleneck):
            residual_blocks.append(module)
    
    num_blocks = [sum(1 for _ in rb.children()) for rb in residual_blocks]
    return num_blocks
print(count_residual_blocks(resnet50))
# 假设已经生成了 ResNet-50 模型，保存在变量 model 中
len(count_residual_blocks(resnet50)),len(count_residual_blocks(resnet101))

[7, 6, 6, 7, 6, 6, 6, 7, 6, 6, 6, 6, 6, 7, 6, 6]


(16, 33)

# CIFAR-10

In [5]:
from torch import optim


# 导入必要的库
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict
import torch
import matplotlib.pyplot as plt
import numpy as np
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from d2l import torch as d2l
from torchvision import  transforms



# 1x1 conv -> 3x3 conv -> 1x1 conv
class Bottleneck(nn.Module):
    def __init__(self, in_channels, channels, stride=1, use_1x1conv=False):
        super(Bottleneck,self).__init__()
        self.conv1 = nn.Conv2d(in_channels, channels, kernel_size=1, stride=1, bias=False)
        self.bn1 = nn.BatchNorm2d(channels)
        self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(channels)
        self.conv3 = nn.Conv2d(channels, channels*4, kernel_size=1, stride=1, bias=False)
        self.bn3 = nn.BatchNorm2d(channels*4)

        if use_1x1conv:
            self.conv4 = nn.Conv2d(
                in_channels, channels*4, kernel_size=1, stride=stride
            )
        else:
            self.conv4 = None

    def forward(self, x):
        # 1x1 conv 通道数：in_channels -> channels
        out = F.relu(self.bn1(self.conv1(x)))
        # 3x3 conv 通道数：channels -> channels
        out = F.relu(self.bn2(self.conv2(out)))
        # 1x1 conv 通道数: channels -> 4*channels
        out = self.bn3(self.conv3(out))

        # 恒等映射 or 1x1 conv
        if  self.conv4 == None:
            identity = x
        else:
            identity = self.conv4(x)


        out += identity
        return F.relu(out)
def bottleneck_block(in_channels, channels, num_bottlenecks, not_FirstBlock = True):
    # 第一个neck使用1x1conv，剩余的neck不使用1x1conv
    # 第一个block的stride=1，后面的block的stride=2
    blk = []
    for i in range(num_bottlenecks):
        if i == 0:
            blk.append(
                Bottleneck(in_channels, channels, stride=not_FirstBlock+1, use_1x1conv=True)
            )
        else:
            blk.append(
                Bottleneck(channels*4, channels)
            )
    return blk
b1 = nn.Sequential(nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),# 第一个参数是图片通道数 3是有色图片
                   nn.BatchNorm2d(64), nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

b2 = nn.Sequential(*bottleneck_block(64, 64, 3, not_FirstBlock=False))
b3 = nn.Sequential(*bottleneck_block(64*4, 128, 3))
b4 = nn.Sequential(*bottleneck_block(128*4, 256, 3))
b5 = nn.Sequential(*bottleneck_block(256*4, 512, 3))
resnet18 = nn.Sequential(
    b1, b2, b3, b4, b5,
    nn.AdaptiveAvgPool2d((1,1)),
    nn.Flatten(),
    nn.Linear(2048, 10)
)

# 定义训练
from torch.utils.tensorboard import SummaryWriter
import tensorboard
def train_loss(net, train_iter, test_iter, epochs, loss, device, lr):
    # 参数初始化kaiming_uniform_
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            # nn.init.xavier_uniform(m.weight)
            nn.init.kaiming_normal_(m.weight)
    net.apply(init_weights)
    

    net.to(device)
    writer = SummaryWriter()  # 创建 TensorBoard writer

    trainer = torch.optim.Adam(net.parameters(), lr=lr)
    # trainer = optim.SGD(net.parameters(), lr=lr, momentum=0.9)
    for epoch in range(epochs):
        # 训练损失之和，训练准确率之和，样本数
        metric = d2l.Accumulator(3)
        net.train()
        timer, num_batches = d2l.Timer(), len(train_iter)
        for i, (X, y) in enumerate(train_iter):
            trainer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            with torch.no_grad():
                metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])

            train_loss = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]

            # 将训练损失和准确率写入 TensorBoard
            writer.add_scalar('Train/Loss', train_loss, epoch * num_batches + i)
            writer.add_scalar('Train/Accuracy', train_acc, epoch * num_batches + i)

            # 打印训练过程
            print(f'Epoch: {epoch+1}, Batch: {i+1}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')

        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        writer.add_scalar('Test/Accuracy', test_acc, epoch)  # 将测试准确率写入 TensorBoard

    print(f'train loss {train_loss:.3f}, train acc {train_acc:.3f}, test acc {test_acc:.3f}')
    torch.save(net.state_dict(), f".\\model\\ResNet-50_Dict_FashionMNIST-10_Epoch{epochs}_Accuracy{test_acc*100:.2f}%.pth")
    torch.save(net, f".\\model\\ResNet-50_FashionMNIST-10_Epoch{epochs}_Accuracy{test_acc*100:.2f}%.pth")
    writer.close()  # 关闭 TensorBoard writer
    
# 加载FashionMNIST
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# 加载CIFAR-10训练数据集
train_dataset = datasets.CIFAR10(
    root='./data', 
    train=True,
    download=True,
    transform=transform
)

# 加载CIFAR-10测试数据集
test_dataset = datasets.CIFAR10(
    root='./data',
    train=False, 
    download=True,
    transform=transform
)

            # nn.init.kaiming_normal_(m.weight)
# batch_size = 4096:train loss 0.011, train acc 0.999, test acc 0.992 Adma优化器 lr =0.001
# batch_size = 4096 train loss 2.303, train acc 0.100, test acc 0.100 Adma优化器 lr =0.1
# batch_size = 4096 train loss 1.286, train acc 0.556, test acc 0.503 sgd优化器 lr =0.1
# batch_size = 4096 train loss 0.315, train acc 0.960, test acc 0.860 sgd优化器 lr =0.001

            # nn.init.xavier_uniform(m.weight)
#  batch_size = 4096 ,train loss 1.131, train acc 0.669, test acc 0.607  sgd优化器 lr =0.001  nn.init.xavier_uniform(m.weight)
# 配置数据加载器
batch_size = 4096 #
# batch_size = 128 # 
train_loader64 = DataLoader(dataset=train_dataset,
                          batch_size=batch_size, shuffle=True)
test_loader64 = DataLoader(dataset=test_dataset,
                         batch_size=batch_size, shuffle=False)

# 开始训练
epochs, lr = 20, 0.001#epochs,lr = 20, 0.001
device = torch.device("cuda:0")
loss = nn.CrossEntropyLoss()
# train_loss(net, train_iter, test_iter, num_epochs, loss, trainer, device, lr

train_loss(resnet18, test_loader64, test_loader64, epochs, loss, device, lr)

# tensorboard 运行
# (mytorch) PS F:\DeepLearn\resnet\runs> tensorboard --logdir=Apr10_17-45-21_NikeLee
# 其中 'Apr10_17-45-21_NikeLee'是文件夹名称

Files already downloaded and verified
Files already downloaded and verified
Epoch: 1, Batch: 1, Train Loss: 5.2135, Train Acc: 0.1067
Epoch: 1, Batch: 2, Train Loss: 12.8641, Train Acc: 0.1455
Epoch: 1, Batch: 3, Train Loss: 13.5099, Train Acc: 0.1407
Epoch: 2, Batch: 1, Train Loss: 10.2949, Train Acc: 0.1848
Epoch: 2, Batch: 2, Train Loss: 8.3082, Train Acc: 0.1991
Epoch: 2, Batch: 3, Train Loss: 7.5315, Train Acc: 0.1989
Epoch: 3, Batch: 1, Train Loss: 3.1154, Train Acc: 0.2427
Epoch: 3, Batch: 2, Train Loss: 2.9448, Train Acc: 0.2670
Epoch: 3, Batch: 3, Train Loss: 2.8826, Train Acc: 0.2682
Epoch: 4, Batch: 1, Train Loss: 2.1189, Train Acc: 0.3157
Epoch: 4, Batch: 2, Train Loss: 2.0579, Train Acc: 0.3307
Epoch: 4, Batch: 3, Train Loss: 2.0515, Train Acc: 0.3309
Epoch: 5, Batch: 1, Train Loss: 1.7847, Train Acc: 0.4084
Epoch: 5, Batch: 2, Train Loss: 1.7403, Train Acc: 0.4119
Epoch: 5, Batch: 3, Train Loss: 1.7188, Train Acc: 0.4147
Epoch: 6, Batch: 1, Train Loss: 1.4733, Train Acc: 

## Ret50 分类 MNIST数据集

In [7]:
# 导入必要的库
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict
import torch
import matplotlib.pyplot as plt
import numpy as np
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from d2l import torch as d2l
from torchvision import  transforms


# data = unpickle('./CIFAR10/cifar-10-batches-py/test_batch')
#
# data[b'data'][0] # array([158, 159, 165, ..., 124, 129, 110], dtype=uint8)

# 定义 Ret50
# 1x1 conv -> 3x3 conv -> 1x1 conv
class Bottleneck(nn.Module):
    def __init__(self, in_channels, channels, stride=1, use_1x1conv=False):
        super(Bottleneck,self).__init__()
        self.conv1 = nn.Conv2d(in_channels, channels, kernel_size=1, stride=1, bias=False)
        self.bn1 = nn.BatchNorm2d(channels)
        self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(channels)
        self.conv3 = nn.Conv2d(channels, channels*4, kernel_size=1, stride=1, bias=False)
        self.bn3 = nn.BatchNorm2d(channels*4)

        if use_1x1conv:
            self.conv4 = nn.Conv2d(
                in_channels, channels*4, kernel_size=1, stride=stride
            )
        else:
            self.conv4 = None

    def forward(self, x):
        # 1x1 conv 通道数：in_channels -> channels
        out = F.relu(self.bn1(self.conv1(x)))
        # 3x3 conv 通道数：channels -> channels
        out = F.relu(self.bn2(self.conv2(out)))
        # 1x1 conv 通道数: channels -> 4*channels
        out = self.bn3(self.conv3(out))

        # 恒等映射 or 1x1 conv
        if  self.conv4 == None:
            identity = x
        else:
            identity = self.conv4(x)


        out += identity
        return F.relu(out)
def bottleneck_block(in_channels, channels, num_bottlenecks, not_FirstBlock = True):
    # 第一个neck使用1x1conv，剩余的neck不使用1x1conv
    # 第一个block的stride=1，后面的block的stride=2
    blk = []
    for i in range(num_bottlenecks):
        if i == 0:
            blk.append(
                Bottleneck(in_channels, channels, stride=not_FirstBlock+1, use_1x1conv=True)
            )
        else:
            blk.append(
                Bottleneck(channels*4, channels)
            )
    return blk
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
                   nn.BatchNorm2d(64), nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

b2 = nn.Sequential(*bottleneck_block(64, 64, 3, not_FirstBlock=False))
b3 = nn.Sequential(*bottleneck_block(64*4, 128, 3))
b4 = nn.Sequential(*bottleneck_block(128*4, 256, 3))
b5 = nn.Sequential(*bottleneck_block(256*4, 512, 3))
ret50 = nn.Sequential(
    b1, b2, b3, b4, b5,
    nn.AdaptiveAvgPool2d((1,1)),
    nn.Flatten(),
    nn.Linear(2048, 10)
)

# 定义训练
from torch.utils.tensorboard import SummaryWriter
import tensorboard
def train_loss(net, train_iter, test_iter, epochs, loss, device, lr):
    # 参数初始化
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.kaiming_uniform_(m.weight)
    net.apply(init_weights)

    net.to(device)
    writer = SummaryWriter()  # 创建 TensorBoard writer

    trainer = torch.optim.Adam(net.parameters(), lr=lr)

    for epoch in range(epochs):
        # 训练损失之和，训练准确率之和，样本数
        metric = d2l.Accumulator(3)
        net.train()
        timer, num_batches = d2l.Timer(), len(train_iter)
        for i, (X, y) in enumerate(train_iter):
            trainer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            with torch.no_grad():
                metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])

            train_loss = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]

            # 将训练损失和准确率写入 TensorBoard
            writer.add_scalar('Train/Loss', train_loss, epoch * num_batches + i)
            writer.add_scalar('Train/Accuracy', train_acc, epoch * num_batches + i)

            # 打印训练过程
            print(f'Epoch: {epoch+1}, Batch: {i+1}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')

        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        writer.add_scalar('Test/Accuracy', test_acc, epoch)  # 将测试准确率写入 TensorBoard

    print(f'train loss {train_loss:.3f}, train acc {train_acc:.3f}, test acc {test_acc:.3f}')
    torch.save(net.state_dict(), f".\\model\\ResNet-50_Dict_MNIST-10_Epoch{epochs}_Accuracy{test_acc*100:.2f}%.pth")
    torch.save(net, f".\\model\\ResNet-50_MNIST-10_Epoch{epochs}_Accuracy{test_acc*100:.2f}%.pth")
    writer.close()  # 关闭 TensorBoard writer
    
# 加载FashionMNIST
trans24 = transforms.Compose(
    [transforms.ToTensor()])#28*28
train_dataset = datasets.MNIST(
    root=r'./', train=True, transform=trans24, download=True)
test_dataset = datasets.MNIST(
    root=r'./', train=False, transform=trans24, download=True)

# 配置数据加载器
batch_size = 4096
train_loader64 = DataLoader(dataset=train_dataset,
                          batch_size=batch_size, shuffle=True)
test_loader64 = DataLoader(dataset=test_dataset,
                         batch_size=batch_size, shuffle=True)

# 开始训练
epochs, lr = 20, 0.001#epochs,lr = 20, 0.001
device = torch.device("cuda:0")
loss = nn.CrossEntropyLoss()
# train_loss(net, train_iter, test_iter, num_epochs, loss, trainer, device, lr
train_loss_LAMB(ret50, test_loader64, test_loader64, epochs, loss, device, lr)

# tensorboard 运行
# (mytorch) PS F:\DeepLearn\resnet\runs> tensorboard --logdir=Apr10_17-45-21_NikeLee


RuntimeError: apex.optimizers.FusedLAMB requires cuda extensions

## 可视化展示



In [None]:
from torch import nn
import torch
import matplotlib.pyplot as plt
import numpy as np
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from d2l import torch as d2l
from torchvision import  transforms

text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
               'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
loss_func = nn.CrossEntropyLoss()
model = torch.load("F:\\DeepLearn\\resnet\model\\ResNet-50_FashionMNIST-10_Epoch20_Accuracy100.00%.pth")

# 优化器
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

costs = []

# 测试网络
def test():
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader64:
            inputs, labels = data
            if torch.cuda.is_available():
                inputs = inputs.cuda()
                labels = labels.cuda()
            output = model(inputs)
            _, predicted = torch.max(output, dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum()
    print(f'测试集上的准确率为:{correct / total * 100:.3f}%')


for epoch in range(epochs):
    test()
    # 绘制图片:此时的costs里面的数据是tensor类型,如果是gpu上跑的就是cuda,需要将他转换成array
    print(costs)
if torch.cuda.is_available():
    costs = [cost.cpu().detach().numpy() for cost in costs]
    print(costs)
    plt.plot(costs)
    plt.xlabel('number of iteration')
    plt.ylabel('loss')
    plt.title('Inception Net')
    plt.show()
else:
    costs = [cost.numpy() for cost in costs]
    print(costs)
    plt.plot(costs)
    plt.xlabel('number of iteration')
    plt.ylabel('loss')
    plt.title('Inception Net')
    plt.show()

测试集上的准确率为:11.780%
[]
测试集上的准确率为:11.780%
[]
测试集上的准确率为:11.780%
[]
测试集上的准确率为:11.780%
[]
测试集上的准确率为:11.780%
[]
测试集上的准确率为:11.780%
[]
测试集上的准确率为:11.780%
[]
测试集上的准确率为:11.780%
[]
测试集上的准确率为:11.780%
[]
测试集上的准确率为:11.780%
[]
测试集上的准确率为:11.780%
[]
测试集上的准确率为:11.780%
[]
测试集上的准确率为:11.780%
[]
测试集上的准确率为:11.780%
[]
测试集上的准确率为:11.780%
[]
测试集上的准确率为:11.780%
[]
测试集上的准确率为:11.780%
[]
测试集上的准确率为:11.780%
[]
测试集上的准确率为:11.780%
[]
测试集上的准确率为:11.780%
[]
[]


In [4]:
# 清理内存
import torch
 

 
# ... 模型训练代码 ...
 
# 清理模型和优化器占用的内存
del ret50

 
# 清理PyTorch缓存
torch.cuda.empty_cache()

In [ ]:
# 重启Jupyter内核
%reset -f

## 定义 ResNet-18 网络结构

In [4]:
class Residual(nn.Module):
    # 残差块
    def __init__(self, input_channels, num_channels,
                 use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, num_channels,
                               kernel_size=3, padding=1, stride=strides)
        self.conv2 = nn.Conv2d(num_channels, num_channels,
                               kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(input_channels, num_channels,
                                   kernel_size=1, stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.bn2 = nn.BatchNorm2d(num_channels)

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        Y += X
        return F.relu(Y)


# ResNet-18
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
                   nn.BatchNorm2d(64), nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))


def resnet_block(input_channels, num_channels, num_residuals,
                 first_block=False):
    blk = []
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.append(Residual(input_channels, num_channels,
                                use_1x1conv=True, strides=2))
        else:
            blk.append(Residual(num_channels, num_channels))
    return blk


b2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
b3 = nn.Sequential(*resnet_block(64, 128, 2))
b4 = nn.Sequential(*resnet_block(128, 256, 2))
b5 = nn.Sequential(*resnet_block(256, 512, 2))

net = nn.Sequential(b1, b2, b3, b4, b5,                     #Squential是个有序的容器，网络层将按照传入该容器的顺序依次加入，用[]来访问任意一层
                    nn.AdaptiveAvgPool2d((1, 1)),
                    nn.Flatten(), nn.Linear(512, 10))


In [4]:

# 下载并配置数据集
trans64 = transforms.Compose(
    [transforms.Resize((224,224)), transforms.ToTensor()])#和d2l不同 重塑为224
train_dataset64 = datasets.MNIST(
    root=r'./', train=True, transform=trans64, download=True)
test_dataset64 = datasets.MNIST(
    root=r'./', train=False, transform=trans64, download=True)

# 配置数据加载器
batch_size = 4096
train_loader64 = DataLoader(dataset=train_dataset64,
                          batch_size=batch_size, shuffle=True)
test_loader64 = DataLoader(dataset=test_dataset64,
                         batch_size=batch_size, shuffle=True)

## 使用28*28像素 进行训练分类

In [7]:
trans24 = transforms.Compose(
    [transforms.ToTensor()])#28*28
train_dataset = datasets.FashionMNIST(
    root=r'./', train=True, transform=trans24, download=True)
test_dataset = datasets.MNIST(
    root=r'./', train=False, transform=trans24, download=True)

# 配置数据加载器
batch_size = 4096
train_loader4096 = DataLoader(dataset=train_dataset,
                          batch_size=batch_size, shuffle=True)
test_loader4096 = DataLoader(dataset=test_dataset,
                         batch_size=batch_size, shuffle=True)

## 训练完成后会保存模型，可以修改模型的保存路径。

# 保存和加载整个模型
torch.save(net, 'net.pth')
model = torch.load('net.pth')

# 将my_resnet模型储存为my_resnet.pth
torch.save(net.state_dict(), "net_parameter.pth")
# 加载resnet，模型存放在my_resnet.pth
net.load_state_dict(torch.load("net_parameter.pth"))



In [7]:
test_dataset

Dataset MNIST
    Number of datapoints: 10000
    Root location: ./
    Split: Test
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
           )

In [5]:
from torch.utils.tensorboard import SummaryWriter
import tensorboard
def train_loss(net, train_iter, test_iter, epochs, loss, device, lr):
    # 参数初始化
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.kaiming_uniform_(m.weight)
    net.apply(init_weights)

    net.to(device)
    writer = SummaryWriter()  # 创建 TensorBoard writer

    trainer = torch.optim.Adam(net.parameters(), lr=lr)

    for epoch in range(epochs):
        # 训练损失之和，训练准确率之和，样本数
        metric = d2l.Accumulator(3)
        net.train()
        timer, num_batches = d2l.Timer(), len(train_iter)
        for i, (X, y) in enumerate(train_iter):
            trainer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            with torch.no_grad():
                metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])

            train_loss = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]

            # 将训练损失和准确率写入 TensorBoard
            writer.add_scalar('Train/Loss', train_loss, epoch * num_batches + i)
            writer.add_scalar('Train/Accuracy', train_acc, epoch * num_batches + i)

            # 打印训练过程
            print(f'Epoch: {epoch+1}, Batch: {i+1}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')

        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        writer.add_scalar('Test/Accuracy', test_acc, epoch)  # 将测试准确率写入 TensorBoard

    print(f'train loss {train_loss:.3f}, train acc {train_acc:.3f}, test acc {test_acc:.3f}')
    torch.save(net.state_dict(), f".\\model\\ResNet-18_CIFAR-10_Epoch{epochs}_Accuracy{test_acc*100:.2f}%.pth")
    torch.save(net, f".\\model\\net_CIFAR-10_Epoch{epochs}_Accuracy{test_acc*100:.2f}%.pth")
    writer.close()  # 关闭 TensorBoard writer

# 使用LAMB 优化器:

In [5]:
from torch.utils.tensorboard import SummaryWriter
import tensorboard
from apex.optimizers import FusedLAMB

def train_loss_LAMB(net, train_iter, test_iter, epochs, loss, device, lr):
    # 参数初始化
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.kaiming_uniform_(m.weight)
    net.apply(init_weights)

    net.to(device)
    writer = SummaryWriter()  # 创建 TensorBoard writer

    trainer = FusedLAMB(net.parameters(), lr=lr)  # 使用 LAMB 优化器
    # trainer = torch.optim.Adam(net.parameters(), lr=lr)
    for epoch in range(epochs):
        # 训练损失之和，训练准确率之和，样本数
        metric = d2l.Accumulator(3)
        net.train()
        timer, num_batches = d2l.Timer(), len(train_iter)
        for i, (X, y) in enumerate(train_iter):
            trainer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            with torch.no_grad():
                metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])

            train_loss = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]

            # 将训练损失和准确率写入 TensorBoard
            writer.add_scalar('Train/Loss', train_loss, epoch * num_batches + i)
            writer.add_scalar('Train/Accuracy', train_acc, epoch * num_batches + i)

            # 打印训练过程
            print(f'Epoch: {epoch+1}, Batch: {i+1}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')

        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        writer.add_scalar('Test/Accuracy', test_acc, epoch)  # 将测试准确率写入 TensorBoard

    print(f'train loss {train_loss:.3f}, train acc {train_acc:.3f}, test acc {test_acc:.3f}')
    torch.save(net.state_dict(), f".\\model\\ResNet-18_CIFAR-10_Epoch{epochs}_Accuracy{test_acc*100:.2f}%.pth")
    torch.save(net, f".\\model\\net_CIFAR-10_Epoch{epochs}_Accuracy{test_acc*100:.2f}%.pth")
    writer.close()  # 关闭 TensorBoard writer

In [6]:
# # def train(net, train_iter, test_iter, epochs,scheduler=None, device):
# lr = 0.0001
# 
# # device = d2l.try_gpu()
# 
# def train_loss(net, train_iter, test_iter, epochs, loss,  device, lr):
# # 参数初始化1
#     def init_weights(m):
#         if type(m) == nn.Linear or type(m) == nn.Conv2d:
#             # nn.init.xavier_uniform_(m.weight )#80.87%
#             nn.init.kaiming_uniform_(m.weight)
#     net.apply(init_weights)
# 
#     # net.load_state_dict(torch.load("./resnet18-f37072fd.pth"),strict=False)
# 
# 
#     net.to(device)#.to(device) 可以指定CPU 或者GPU
#     animator = d2l.Animator(xlabel='epoch', xlim=[0, epochs],#画出loos新添加画图工具
#                             legend=['train loss', 'train acc', 'test acc'])
# 
#     trainer = torch.optim.Adam(net.parameters(), lr=lr)
# 
# 
#     for epoch in range(epochs):
#         # 训练损失之和，训练准确率之和，样本数
#         metric = d2l.Accumulator(3)
#         net.train()#在使用 pytorch 构建神经网络的时候，训练过程中会在程序上方添加一句model.train()，作用是 启用 batch normalization 和 dropout 。
#         timer, num_batches = d2l.Timer(), len(train_iter)
#         for i, (X, y) in enumerate(train_iter):
# 
#             trainer.zero_grad()
#             X, y = X.to(device), y.to(device)
#             y_hat = net(X)
#             l = loss(y_hat, y)
#             l.backward()
#             trainer.step()
#             with torch.no_grad():
#                 metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
# 
#             train_loss = metric[0] / metric[2]
#             train_acc = metric[1] / metric[2]
#             # 画出loos新添加
#             if (i + 1) % 50 == 0:
#                 animator.add(epoch + i / len(train_iter),
#                              (train_loss, train_acc, None))
# 
#             if (i + 1) % (num_batches // 30) == 0 or i == num_batches - 1:
#                 print(f'Epoch: {epoch+1}, Step: {i+1}, Loss: {train_loss:.4f}')
# 
#         test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
#         animator.add(epoch+1, (None, None, test_acc))#画出loos新添加
# 
#         #新添加 学习率调度器 多因子调度器
# 
# 
#     print(f'train loss {train_loss:.3f}, train acc {train_acc:.3f}, '
#           f'test acc {test_acc:.3f}')
#     torch.save(net.state_dict(),
#                f".\\model\\ResNet-18_CIFAR-10_Epoch{epochs}_Accuracy{test_acc*100:.2f}%.pth")
#     torch.save(net,f".\\model\\net_CIFAR-10_Epoch{epochs}_Accuracy{test_acc*100:.2f}%.pth")


In [8]:

def train(net, train_iter, test_iter, epochs, lr, device):
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            # nn.init.xavier_uniform_(m.weight )#80.87%d2l使用的
            nn.init.kaiming_uniform_(m.weight)
    net.apply(init_weights)

    print(f'Training on:[{device}]')
    net.to(device)#.to(device) 可以指定CPU 或者GPU
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)
    loss = nn.CrossEntropyLoss()
    timer, num_batches = d2l.Timer(), len(train_iter)


    for epoch in range(epochs):
        # 训练损失之和，训练准确率之和，样本数
        metric = d2l.Accumulator(3)
        net.train()#在使用 pytorch 构建神经网络的时候，训练过程中会在程序上方添加一句model.train()，作用是 启用 batch normalization 和 dropout 。
        for i, (X, y) in enumerate(train_iter):
            timer.start()
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            with torch.no_grad():
                metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
            timer.stop()
            train_l = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]
          
           

            if (i + 1) % (num_batches // 30) == 0 or i == num_batches - 1:
                print(f'Epoch: {epoch+1}, Step: {i+1}, Loss: {train_l:.4f}')
        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        print(            f'Train Accuracy: {train_acc*100:.2f}%, Test Accuracy: {test_acc*100:.2f}%')
    print(f'{metric[2] * epochs / timer.sum():.1f} examples/sec '
          f'on: [{str(device)}]')
 


训练模型（或加载模型）
如果环境正确配置了 CUDA，则会由 GPU 进行训练。
加载模型需要根据自身情况修改路径

## 在Pytorch中构建好一个模型后，一般需要进行预训练权重中加载。torch.load_state_dict()函数就是用于将预训练的参数权重加载到新的模型之中，操作方式如下所示：
# sd_net = torchvision.models.resnte50(pretrained=False)
# sd_net.load_state_dict(torch.load('*.pth'), strict=True)
在本博文中重点关注的是 属性 strict; 当strict=True,要求预训练权重层数的键值与新构建的模型中的权重层数名称完全吻合；如果新构建的模型在层数上进行了部分微调，则上述代码就会报错：说key对应不上。

此时，如果我们采用strict=False 就能够完美的解决这个问题。也即，与训练权重中与新构建网络中匹配层的键值就进行使用，没有的就默认初始化。


### train_loader64是MNISTtrain_loader4096 是FashionMnist数据集


ret18  

In [27]:
epochs, lr = 10, 0.001#epochs,lr = 20, 0.001
device = torch.device("cuda:0")
train(net, train_loader64, test_loader64, epochs, lr, device)

Training on:[cuda:0]
Epoch: 1, Step: 31, Loss: 1.0478
Epoch: 1, Step: 62, Loss: 0.6605
Epoch: 1, Step: 93, Loss: 0.5033
Epoch: 1, Step: 124, Loss: 0.4132
Epoch: 1, Step: 155, Loss: 0.3553
Epoch: 1, Step: 186, Loss: 0.3153
Epoch: 1, Step: 217, Loss: 0.2841
Epoch: 1, Step: 248, Loss: 0.2612
Epoch: 1, Step: 279, Loss: 0.2413
Epoch: 1, Step: 310, Loss: 0.2263
Epoch: 1, Step: 341, Loss: 0.2143
Epoch: 1, Step: 372, Loss: 0.2038
Epoch: 1, Step: 403, Loss: 0.1931
Epoch: 1, Step: 434, Loss: 0.1848
Epoch: 1, Step: 465, Loss: 0.1769
Epoch: 1, Step: 496, Loss: 0.1715
Epoch: 1, Step: 527, Loss: 0.1670
Epoch: 1, Step: 558, Loss: 0.1614
Epoch: 1, Step: 589, Loss: 0.1565
Epoch: 1, Step: 620, Loss: 0.1510
Epoch: 1, Step: 651, Loss: 0.1451
Epoch: 1, Step: 682, Loss: 0.1414
Epoch: 1, Step: 713, Loss: 0.1381
Epoch: 1, Step: 744, Loss: 0.1348
Epoch: 1, Step: 775, Loss: 0.1312
Epoch: 1, Step: 806, Loss: 0.1283
Epoch: 1, Step: 837, Loss: 0.1254
Epoch: 1, Step: 868, Loss: 0.1231
Epoch: 1, Step: 899, Loss: 0.1

KeyboardInterrupt: 

ret50

In [9]:
epochs, lr = 10, 0.001#epochs,lr = 20, 0.001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train(ret50, train_loader, test_loader, epochs, lr, device)

Training on:[cuda]
Epoch: 1, Step: 31, Loss: 4.4236
Epoch: 1, Step: 62, Loss: 2.3907
Epoch: 1, Step: 93, Loss: 1.6924
Epoch: 1, Step: 124, Loss: 1.3206
Epoch: 1, Step: 155, Loss: 1.0931
Epoch: 1, Step: 186, Loss: 0.9404
Epoch: 1, Step: 217, Loss: 0.8225
Epoch: 1, Step: 248, Loss: 0.7367
Epoch: 1, Step: 279, Loss: 0.6706
Epoch: 1, Step: 310, Loss: 0.6135
Epoch: 1, Step: 341, Loss: 0.5677
Epoch: 1, Step: 372, Loss: 0.5307
Epoch: 1, Step: 403, Loss: 0.4970
Epoch: 1, Step: 434, Loss: 0.4678
Epoch: 1, Step: 465, Loss: 0.4433
Epoch: 1, Step: 496, Loss: 0.4200
Epoch: 1, Step: 527, Loss: 0.3990
Epoch: 1, Step: 558, Loss: 0.3808
Epoch: 1, Step: 589, Loss: 0.3657
Epoch: 1, Step: 620, Loss: 0.3508
Epoch: 1, Step: 651, Loss: 0.3369
Epoch: 1, Step: 682, Loss: 0.3246
Epoch: 1, Step: 713, Loss: 0.3134
Epoch: 1, Step: 744, Loss: 0.3034
Epoch: 1, Step: 775, Loss: 0.2956
Epoch: 1, Step: 806, Loss: 0.2877
Epoch: 1, Step: 837, Loss: 0.2797
Epoch: 1, Step: 868, Loss: 0.2721
Epoch: 1, Step: 899, Loss: 0.264

KeyboardInterrupt: 

# ret50_plot_loss


In [6]:
epochs, lr = 20, 0.001#epochs,lr = 20, 0.001
device = torch.device("cuda:0")
loss = nn.CrossEntropyLoss()
# train_loss(net, train_iter, test_iter, num_epochs, loss, trainer, device, lr
train_loss(ret50, test_loader64, test_loader64, epochs, loss, device, lr)

OutOfMemoryError: CUDA out of memory. Tried to allocate 12.25 GiB. GPU 0 has a total capacity of 22.00 GiB of which 0 bytes is free. Of the allocated memory 25.34 GiB is allocated by PyTorch, and 15.70 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [7]:

import torch
 

 
# ... 模型训练代码 ...
 
# 清理模型和优化器占用的内存
del ret50
del trainer
 
# 清理PyTorch缓存
torch.cuda.empty_cache()

## 可视化展示

In [None]:

text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']

In [None]:
loss_func = nn.CrossEntropyLoss()
model = net
# 优化器
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

costs = []

# 训练网络
def train(epoch):
    batch_loss = 0
    for step, (data) in enumerate(train_loader):
        inputs, labels = data
        if torch.cuda.is_available():
            inputs = inputs.cuda()
            labels = labels.cuda()
        optimizer.zero_grad()
        output = model(inputs)
        loss = loss_func(output, labels)
        loss.backward()
        optimizer.step()
        costs.append(loss)
        batch_loss += loss.item()

        if step % 300 == 299:
            print(f'epoch:{epoch},step:{step + 1},mini_loss:{batch_loss / 300:.3f}')
            batch_loss = 0


# 测试网络
def test():
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader64:
            inputs, labels = data
            if torch.cuda.is_available():
                inputs = inputs.cuda()
                labels = labels.cuda()
            output = model(inputs)
            _, predicted = torch.max(output, dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum()
    print(f'测试集上的准确率为:{correct / total * 100:.3f}%')


if __name__ == '__main__':
    for epoch in range(3):
        train(epoch)
        test()
    # 绘制图片:此时的costs里面的数据是tensor类型,如果是gpu上跑的就是cuda,需要将他转换成array
    print(costs)
    if torch.cuda.is_available():
        costs = [cost.cpu().detach().numpy() for cost in costs]
    else:
        costs = [cost.numpy() for cost in costs]
    print(costs)
    plt.plot(costs)
    plt.xlabel('number of iteration')
    plt.ylabel('loss')
    plt.title('Inception Net')
    plt.show()
