<a href="https://colab.research.google.com/github/rzens/myOriginalRzens/blob/master/HowToUseCuda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 连接到 google 云盘
from google.colab import drive
drive.mount('/content/drive')
%cd drive/MyDrive/Dataset/
!pwd

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Dataset
/content/drive/MyDrive/Dataset


In [None]:
import time

import torch
import torchvision
from torch import nn
from torch.nn import CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Flatten, Linear
from torch.optim import SGD
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

# from TrainModel import Model

train_data = torchvision.datasets.CIFAR10(root="drive/MyDrive/Dataset", train=True, transform=torchvision.transforms.ToTensor(),
                                          download=True)
test_data = torchvision.datasets.CIFAR10(root="drive/MyDrive/Dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                         download=True)

train_data_size = len(train_data)
print("训练数据集的长度为：{}".format(train_data_size))

train_dataloader = DataLoader(train_data, batch_size=256)
test_dataloader = DataLoader(test_data, batch_size=256)


# 搭建神经网络
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.model1 = Sequential(
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)
        )

    def forward(self, x):
        x = self.model1(x)
        return x


# 定义训练的设备
if torch.cuda.is_available():
    print("use cuda")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
# device = torch.device("cuda")
# device = torch.device("cuda:0")
model = Model()
model = model.to(device)
loss_tm = CrossEntropyLoss()
loss_tm = loss_tm.to(device)
learning_rate = 0.001
optim_tm = SGD(model.parameters(), lr=learning_rate)

total_train_steps = 0
total_test_steps = 0
epoch = 100  # 模型将对一个数据集学习 epoch 次

writer = SummaryWriter("Train")
test_data_size = len(test_data)  # 查看总的测试集数据量大小，便于求正确率
for i in range(epoch):
    print()
    start_time = time.time()
    print("第 {} 轮训练开始".format(i))
    model.train()  # 将模型设置为 train 模式，dropout 和 bn 等正常使用
    for data in train_dataloader:
        imgs, targets = data
        imgs = imgs.to(device)
        targets = targets.to(device)
        ys = model(imgs)
        loss = loss_tm(ys, targets)

        optim = optim_tm.zero_grad()
        loss.backward()
        optim_tm.step()

        writer.add_scalar("train_loss", loss.item(), total_train_steps)
        total_train_steps += 1

    # 当完成一轮训练时，正常流程是进行模型学习能力的验证
    model.eval()  # 将模型设置为 eval 模式，dropout 和 bn 等被禁用掉
    total_test_loss = 0
    total_accurate = 0  # 一次 epoch 后对模型的验证的正确个数
    with torch.no_grad():
        for data in test_dataloader:
            imgs, targets = data
            imgs = imgs.to(device)
            targets = targets.to(device)
            outputs = model(imgs)
            loss = loss_tm(outputs, targets)
            total_test_loss += loss.item()
            batch_accurate = (
                        outputs.argmax(1) == targets).sum()  # 看 True 的个数有多少，即预测出的 index 和 正确的 targets 中 index 是否相等
            # print("本 batch 正确个数：", batch_accurate.item())
            total_accurate += batch_accurate.item()  # 积累每次测试新数据的准确度
    print("验证集损失：{}。".format(total_test_loss))
    print("本次 epoch 正确率：{}。".format(total_accurate / test_data_size))
    writer.add_scalar("test_loss", total_test_loss, total_train_steps)
    writer.add_scalar("accuracy", total_accurate / test_data_size, total_train_steps)
    total_test_steps += 1
    end_time = time.time()
    print("本 epoch 运行时间：", end_time - start_time)  # 输出是以 s 为单位
writer.close()
torch.save(model.state_dict(), "model.pth")  # 保存参数


In [None]:
import time

import torch
import torchvision
from torch import nn
from torch.nn import CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Flatten, Linear
from torch.optim import Adam
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

# from TrainModel import Model

train_data = torchvision.datasets.CIFAR10(root="drive/MyDrive/Dataset", train=True,
                                          transform=torchvision.transforms.ToTensor(),
                                          download=True)
test_data = torchvision.datasets.CIFAR10(root="drive/MyDrive/Dataset", train=False,
                                         transform=torchvision.transforms.ToTensor(),
                                         download=True)

train_data_size = len(train_data)
print("训练数据集的长度为：{}".format(train_data_size))

train_dataloader = DataLoader(train_data, batch_size=256)
test_dataloader = DataLoader(test_data, batch_size=256)


# 搭建神经网络
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.model1 = Sequential(
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)
        )

    def forward(self, x):
        x = self.model1(x)
        return x


# 定义训练的设备
if torch.cuda.is_available():
    print("use cuda")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ##### directml ##### #
# import torch_directml
# print("torch_directml")
# device = torch_directml.device()
# ##### directml ##### #

# device = torch.device("cuda")
# device = torch.device("cuda:0")
model = Model()
model = model.to(device)
loss_tm = CrossEntropyLoss()
loss_tm = loss_tm.to(device)
learning_rate = 0.01
optim_tm = Adam(model.parameters())

# ##### intel ##### #
# import intel_extension_for_pytorch as ipex
# model, optim_tm = ipex.optimize(model, optimizer=optim_tm)
#


total_train_steps = 0
total_test_steps = 0
epoch = 70  # 模型将对一个数据集学习 epoch 次

writer = SummaryWriter("Train")
test_data_size = len(test_data)  # 查看总的测试集数据量大小，便于求正确率
for i in range(epoch):
    print()
    start_time = time.time()
    print("第 {} 轮训练开始".format(i))
    model.train()  # 将模型设置为 train 模式，dropout 和 bn 等正常使用
    for data in train_dataloader:
        imgs, targets = data
        imgs = imgs.to(device)
        targets = targets.to(device)
        ys = model(imgs)
        loss = loss_tm(ys, targets)

        optim = optim_tm.zero_grad()
        loss.backward()
        optim_tm.step()

        writer.add_scalar("train_loss", loss.item(), total_train_steps)
        total_train_steps += 1

    # 当完成一轮训练时，正常流程是进行模型学习能力的验证
    model.eval()  # 将模型设置为 eval 模式，dropout 和 bn 等被禁用掉
    total_test_loss = 0
    total_accurate = 0  # 一次 epoch 后对模型的验证的正确个数
    with torch.no_grad():
        for data in test_dataloader:
            imgs, targets = data
            imgs = imgs.to(device)
            targets = targets.to(device)
            outputs = model(imgs)
            loss = loss_tm(outputs, targets)
            total_test_loss += loss.item()
            batch_accurate = (
                    outputs.argmax(1) == targets).sum()  # 看 True 的个数有多少，即预测出的 index 和 正确的 targets 中 index 是否相等
            # print("本 batch 正确个数：", batch_accurate.item())
            total_accurate += batch_accurate.item()  # 积累每次测试新数据的准确度
    print("验证集损失：{}。".format(total_test_loss))
    print("本次 epoch 正确率：{}。".format(total_accurate / test_data_size))
    writer.add_scalar("test_loss", total_test_loss, total_train_steps)
    writer.add_scalar("accuracy", total_accurate / test_data_size, total_train_steps)
    total_test_steps += 1
    end_time = time.time()
    print("本 epoch 运行时间：", end_time - start_time)  # 输出是以 s 为单位
writer.close()
torch.save(model.state_dict(), "model.pth")  # 保存参数

In [None]:
# 通用的训练流程
# 以及看各个类的准确度
import time

import torch
import torchvision
from torch.nn import Linear, CrossEntropyLoss
from torch.optim import Adam
from torch.utils.data import DataLoader

# 获取数据
train_data = torchvision.datasets.CIFAR10(root="drive/MyDrive/Dataset", train=True,
                                          transform=torchvision.transforms.ToTensor(),
                                          download=True)
test_data = torchvision.datasets.CIFAR10(root="drive/MyDrive/Dataset", train=False,
                                         transform=torchvision.transforms.ToTensor(),
                                         download=True)

train_dataloader = DataLoader(train_data, batch_size=256)
test_dataloader = DataLoader(test_data, batch_size=256)
# 定义模型
vgg16 = torchvision.models.vgg16(pretrained=False)  # pretrained=False - 模型的参数是随机的，没有经过训练

# 如果是引入预训练的模型，则需要再下载这个模型的参数。下载操作是自动的
# vgg16_true = torchvision.models.vgg16(pretrained=True)  # pretrained=True - 模型的参数是已经经过预先训练了


vgg16.classifier[6] = Linear(4096, 10)
print(vgg16)  # 查看具体的模型结构

# 定义训练的设备
device = torch.device("cpu")
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("use cuda")

vgg16.to(device)
ce_loss = CrossEntropyLoss()
ce_loss.to(device)
adam_optim = Adam(vgg16.parameters())


# ##### intel ##### #
# import intel_extension_for_pytorch as ipex

# vgg16, adam_optim = ipex.optimize(vgg16, optimizer=adam_optim)


#


def train_model_a_epoch(model: torch.nn.Module, loss, optim: torch.optim.Optimizer, dataloader):
    start_time = time.time()
    model.train()
    for data in dataloader:
        imgs, targets = data
        imgs = imgs.to(device)
        targets = targets.to(device)
        train_output = model(imgs)
        train_loss = loss(train_output, targets)

        optim.zero_grad()
        train_loss.backward()
        optim.step()
    end_time = time.time()
    print("耗费时间 {}s".format(end_time - start_time))


def test_model_a_epoch(model: torch.nn.Module, loss, dataloader):
    start_time = time.time()
    test_data_size = len(dataloader)
    total_test_loss = 0
    total_test_accuracy = 0
    total_test_classes_accuracy = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    total_test_classes_sum = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    model.eval()
    with torch.no_grad():
        for batch in dataloader:
            imgs, targets = batch
            imgs = imgs.to(device)
            targets = targets.to(device)
            outputs = model(imgs)
            test_loss = loss(outputs, targets)
            total_test_loss += test_loss.item()

            """
            output 是一个至少 shape 是 A = [ B1 = [], B2 = [], ...]
            .argmax(1) 将得到 B1 中最大值的那个 index
            """
            batch_accuracy = (outputs.argmax(1) == targets).sum()
            # 查看各类的准确度
            for i in range(len(outputs.argmax(1))):
                total_test_classes_sum[targets[i]] += 1
                if outputs.argmax(1)[i] == targets[i]:
                    total_test_classes_accuracy[targets[i]] += 1

            total_test_accuracy += batch_accuracy.item()
    print("验证集损失：{}。".format(total_test_loss))
    print("本次 epoch 正确率：{}。".format(total_test_accuracy / test_data_size))
    for i in range(len(total_test_classes_accuracy)):
        if total_test_classes_sum[i] == 0:
            continue
        print("本次 epoch 中第" + str(i) + "个类别的正确率：" +
              str(total_test_classes_accuracy[i] / total_test_classes_sum[i]) + "。")
        print("total_test_classes_accuracy",total_test_classes_accuracy[i],"total_test_classes_sum[i]",total_test_classes_sum[i])
    end_time = time.time()
    print("耗费时间 {}s".format(end_time - start_time))


# 进行模型的训练
if __name__ == "__main__":
    total_epochs = 30
    now_epoch = 0
    for i in range(total_epochs):
        print("第 {} 次训练开始".format(now_epoch))
        train_model_a_epoch(vgg16, ce_loss, adam_optim, train_dataloader)
        print("第 {} 次测试开始".format(now_epoch))
        test_model_a_epoch(vgg16, ce_loss, test_dataloader)


Files already downloaded and verified
Files already downloaded and verified




VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

KeyboardInterrupt: ignored