In [19]:
net = torch.nn.Linear(10,1)
print(net)
print('---------------------')
net = torch.nn.DataParallel(net, device_ids=[0,3])
print(net)

Linear(in_features=10, out_features=1, bias=True)
---------------------
DataParallel(
  (module): Linear(in_features=10, out_features=1, bias=True)
)


In [3]:
torch.save(net.module.state_dict(), './networks/multiGPU.pt')

In [4]:
new_net = torch.nn.Linear(10,1)
new_net.load_state_dict(torch.load("./networks/multiGPU.pt"))


<All keys matched successfully>

In [5]:
import time
import torch
from torch import nn, optim
import torch.nn.functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class Inception(nn.Module):
    # c1 - c4为每条线路里的层的输出通道数
    def __init__(self, in_c, c1, c2, c3, c4):
        super(Inception, self).__init__()
        # 线路1，单1 x 1卷积层
        self.p1_1 = nn.Conv2d(in_c, c1, kernel_size=1)
        # 线路2，1 x 1卷积层后接3 x 3卷积层
        self.p2_1 = nn.Conv2d(in_c, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # 线路3，1 x 1卷积层后接5 x 5卷积层
        self.p3_1 = nn.Conv2d(in_c, c3[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
        # 线路4，3 x 3最大池化层后接1 x 1卷积层
        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p4_2 = nn.Conv2d(in_c, c4, kernel_size=1)

    def forward(self, x):
        p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        p4 = F.relu(self.p4_2(self.p4_1(x)))
        return torch.cat((p1, p2, p3, p4), dim=1)  # 在通道维上连结输出


In [6]:
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))


b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1),
                   nn.Conv2d(64, 192, kernel_size=3, padding=1),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))


b3 = nn.Sequential(Inception(192, 64, (96, 128), (16, 32), 32),
                   Inception(256, 128, (128, 192), (32, 96), 64),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))


b4 = nn.Sequential(Inception(480, 192, (96, 208), (16, 48), 64),
                   Inception(512, 160, (112, 224), (24, 64), 64),
                   Inception(512, 128, (128, 256), (24, 64), 64),
                   Inception(512, 112, (144, 288), (32, 64), 64),
                   Inception(528, 256, (160, 320), (32, 128), 128),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))


In [11]:
import torch.nn.functional as F
import torch
class GlobalAvgPool2d(torch.nn.Module):
    # 全局平均池化层可通过将池化窗口形状设置成输入的高和宽实现
    def __init__(self):
        super(GlobalAvgPool2d, self).__init__()
    def forward(self, x):
        return F.avg_pool2d(x, kernel_size=x.size()[2:])

b5 = nn.Sequential(Inception(832, 256, (160, 320), (32, 128), 128),
                   Inception(832, 384, (192, 384), (48, 128), 128),
                   GlobalAvgPool2d())

class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x): # x shape: (batch, *, *, ...)
        return x.view(x.shape[0], -1)


net = nn.Sequential(b1, b2, b3, b4, b5, 
                    FlattenLayer(), nn.Linear(1024, 10))

net = torch.nn.DataParallel(net, device_ids=[0, 3])

In [12]:
def load_data_fashion_mnist(batch_size, resize=None, root='~/Datasets/FashionMNIST'):
    import torchvision
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())

    transform = torchvision.transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)

    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=4)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=4)

    return train_iter, test_iter

def evaluate_accuracy(data_iter, net, device=None):
    if device is None and isinstance(net, nn.Module):
        ##如果没指定device就使用net的device
        device = list(net.parameters())[0].device
    
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for x ,y in data_iter:
            if isinstance(net, nn.Module):
                net.eval()
                acc_sum += (net(x.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
                net.train()
            else:##自定义模型
                if ('is_training' in net.__code__.co_varnames):
                    ##将is_training设置为false
                    acc_sum += (net(x, is_training = False).argmax(dim=1) == y).float().sum().item()
                else:
                    acc_sum += (net(x).argmax(dim=1) == y).float().sum().item()
            n += y.shape[0]
    return acc_sum / n
                
    
def train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, epochs):
    net = net.to(device)
    loss = nn.CrossEntropyLoss()
    for epoch in range(epochs):
        train_ls_sum, train_acc_sum = 0, 0
        n = 0
        batch_count = 0
        start = time.time()
        for x, y in train_iter:
            x = x.to(device)
            y = y.to(device)
            y_hat = net(x)
            ls = loss(y_hat, y)
            optimizer.zero_grad()
            ls.backward()
            optimizer.step()
            train_ls_sum += ls.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
              % (epoch + 1, train_ls_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))




In [16]:
batch_size = 512
# 如出现“out of memory”的报错信息，可减小batch_size或resize
train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=96)

lr, num_epochs = 0.001, 30
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

epoch 1, loss 0.6004, train acc 0.775, test acc 0.781, time 21.2 sec
epoch 2, loss 0.4678, train acc 0.828, test acc 0.826, time 21.1 sec
epoch 3, loss 0.4106, train acc 0.847, test acc 0.840, time 21.2 sec
epoch 4, loss 0.3586, train acc 0.865, test acc 0.862, time 21.2 sec
epoch 5, loss 0.3303, train acc 0.876, test acc 0.865, time 21.3 sec
epoch 6, loss 0.3069, train acc 0.884, test acc 0.869, time 21.3 sec
epoch 7, loss 0.2847, train acc 0.894, test acc 0.878, time 21.3 sec
epoch 8, loss 0.2734, train acc 0.898, test acc 0.874, time 21.4 sec
epoch 9, loss 0.2615, train acc 0.900, test acc 0.881, time 21.5 sec
epoch 10, loss 0.2512, train acc 0.906, test acc 0.894, time 21.3 sec
epoch 11, loss 0.2358, train acc 0.912, test acc 0.890, time 21.3 sec
epoch 12, loss 0.2290, train acc 0.914, test acc 0.886, time 21.2 sec
epoch 13, loss 0.2207, train acc 0.917, test acc 0.903, time 21.3 sec
epoch 14, loss 0.2108, train acc 0.921, test acc 0.894, time 21.5 sec
epoch 15, loss 0.2045, train 