说明：   
1   程序在Google colab上使用GPU运行  
2   GPU与CPU代码区别，数据类型要使用cuda()函数进行转换 

详细参考：  
torch.cuda包：https://pytorch.org/docs/stable/cuda.html    
cuda函数：Moves all model parameters and buffers to the GPU.  

In [2]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision.datasets import mnist, cifar
import numpy as np
import matplotlib.pyplot as plt
from torchvision import transforms

train_data_tf = transforms.Compose(
    [
        transforms.Resize(40),
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(32),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ]
)

test_data_tf = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])
    ]
)


class SelfMadeNN(nn.Module):
    # 4个卷积层＋2个全连接层
    def __init__(self, n_hidden_1, n_hidden_2, out_dim):
        super(SelfMadeNN, self).__init__()
        self.layer1 = nn.Sequential(
            # (32-3+2*0)/1 +1 = 30
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=0),
            nn.BatchNorm2d(32),  # 批标准化放在卷积层后面，激活层前面
            nn.ReLU(inplace=True)
        )
        self.layer2 = nn.Sequential(
            # (30-3+2*0)/1 +1 = 28
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=0),
            nn.BatchNorm2d(64),  # 批标准化放在卷积层后面，激活层前面
            nn.ReLU(inplace=True),
            # (28-2)/2 +1 = 14
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer3 = nn.Sequential(
            # (14-3)/1 +1 = 12
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=0),
            nn.BatchNorm2d(64),  # 批标准化放在卷积层后面，激活层前面
            nn.ReLU(inplace=True)
        )
        self.layer4 = nn.Sequential(
            # (12-3)/1 +1 = 10
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=0),
            nn.BatchNorm2d(128),  # 批标准化放在卷积层后面，激活层前面
            nn.ReLU(inplace=True),
            # (10-2)/2 +1 = 5
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.layer5 = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(5 * 5 * 128, n_hidden_1),
            nn.ReLU(inplace=True),

            nn.Dropout(p=0.5),
            nn.Linear(n_hidden_1, n_hidden_2),
            nn.ReLU(inplace=True),

            nn.Linear(n_hidden_2, out_dim),

        )

    def forward(self, x):
        # 卷积层
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        # 全连接层
        # 进入全连接之前要将矩阵转为向量
        x = x.view(x.size(0), -1)
        x = self.layer5(x)

        return x


def train_model():
    print(torch.cuda.is_available())
    
    # 1 设置参数
    batch_size = 64
    learning_rate = 0.001
    momentum = 0.9
    num_epoch = 100

    hidden1_nodes = 100
    hidden2_nodes = 200
    output_nodes = 10

    # 2 准备数据集
    # 2.1 下载数据集
    train_set = cifar.CIFAR10('./data', train=True, transform=train_data_tf, download=True)
    test_set = cifar.CIFAR10('./data', train=False, transform=test_data_tf, download=True)

    # 2 分成batch
    train_data = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    test_data = DataLoader(test_set, batch_size=batch_size, shuffle=False)

    # 3 创建模型:使用GPU
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = SelfMadeNN(hidden1_nodes, hidden2_nodes, output_nodes)
    model.to(device)


    
    # 3.1 定义 loss 函数
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)  # 使用随机梯度下降，学习率 0.1

    # 3.2 开始训练
    for i in range(num_epoch):
        train_loss = 0
        train_acc = 0
        model.train()
        for im, label in train_data:
            
            # 使用GPU数据类型
            im = im.cuda()
            label =label.cuda()

            # 前向传播
            out = model(im)
            loss = criterion(out,label)

            # 反向传播
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # 记录误差
            train_loss += loss.item()
            # 计算分类的准确率
            _, pred = out.max(1)
            num_correct = (pred == label).sum().item()
            acc = num_correct / im.shape[0]
            train_acc += acc

        # 在测试集上检验效果
        eval_loss = 0
        eval_acc = 0
        model.eval()  # 将模型改为预测模式
        for im, label in test_data:
            
            # 使用GPU数据类型
            im = im.cuda()
            label =label.cuda()
            
            out = model(im)
            loss = criterion(out, label)
            # 记录误差
            eval_loss += loss.item()
            # 记录准确率
            _, pred = torch.max(out, 1)
            num_correct = (pred == label).sum().item()
            acc = num_correct / im.shape[0]
            eval_acc += acc

        print('epoch: {}, Train Loss: {:.6f}, Train Acc: {:.6f}, Eval Loss: {:.6f}, Eval Acc: {:.6f}'
              .format(i, train_loss / len(train_data), train_acc / len(train_data),
                      eval_loss / len(test_data), eval_acc / len(test_data)))

    print("train and test-------------------Done!")



![jupyter](./demo32.png)