In [None]:
%config InlineBackend.figure_format = 'svg'
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import pandas as pd
import numpy
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
class residual(nn.Module):
    def __init__(self,input_c,output_c,downsample=False):
        super().__init__()
        if downsample:
            stride = 2
            self.process = nn.Sequential(nn.Conv2d(input_c, output_c, kernel_size=1, stride=2))
        else:
            stride = 1
            self.process = nn.Sequential()
        self.bn1 = nn.BatchNorm2d(input_c)
        self.bn2 =  nn.BatchNorm2d(output_c)
        self.conv1 = nn.Conv2d(input_c, output_c, kernel_size=3, padding=1, stride=stride)
        self.conv2 = nn.Conv2d(output_c, output_c, kernel_size=3, padding=1)
    
    def forward(self,x):
        y = self.conv1(torch.relu(self.bn1(x)))
        y = self.conv2(torch.relu(self.bn2(y)))
        x = self.process(x)
        return y + x
    

In [None]:
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
                   nn.BatchNorm2d(64), nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
def block(input_c, output_c, num, use_1x1= False):
    blk = []
    for i in range(num):
        if i == 0 and not use_1x1:
            blk.append(
                residual(input_c, output_c, downsample=True))
        else:
            blk.append(residual(output_c, output_c))
    return blk

b2 = nn.Sequential(*block(64, 64, 2, True))
b3 = nn.Sequential(*block(64, 128, 2))
b4 = nn.Sequential(*block(128, 256, 2))
b5 = nn.Sequential(*block(256, 512, 2))

net = nn.Sequential(b1, b2, b3, b4, b5, nn.AdaptiveAvgPool2d((1, 1)),
                    nn.Flatten(), nn.Linear(512, 10))


In [None]:
X = torch.rand(size=(1, 1, 96, 96))
for layer in net:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape:\t', X.shape)

In [None]:
class mnist(Dataset):
    def __init__(self,path,resize=None):
        data = pd.read_csv(path)
        self.y = data.iloc[:,0].to_numpy()
        self.x = data.iloc[:,1:].to_numpy().reshape(-1,28,28)
        self.resize = resize
        if resize:
            self.transform = transforms.Compose([transforms.Resize(resize),
                                                transforms.ToTensor()])
        else:
            self.transform = transforms.Compose([transforms.ToTensor()])
    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        x = Image.fromarray(self.x[idx].astype('uint8'))
        y = self.y[idx]
        if self.transform:
            x = self.transform(x)
            #x = x.view(self.resize)
        return x, y

test = mnist('../input/mnist-in-csv/mnist_test.csv', 96)
train = mnist('../input/mnist-in-csv/mnist_train.csv', 96)
test_iter = DataLoader(test, batch_size=256, shuffle=True)
train_iter = DataLoader(train, batch_size=256, shuffle=True)

In [None]:
import time as t
def try_gpu(i=0):  #@save
    """如果存在，则返回gpu(i)，否则返回cpu()。"""
    if torch.cuda.device_count() >= i + 1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')

def evaluate_accuracy_gpu(net, data_iter, device=None):  #@save
    """使用GPU计算模型在数据集上的精度。"""
    if isinstance(net, torch.nn.Module):
        net.eval()  # 设置为评估模式
        if not device:
            device = next(iter(net.parameters())).device
    # 正确预测的数量，总预测的数量
    acc = [0,0]
    for X, y in data_iter:
        if isinstance(X, list):
            # BERT微调所需的（之后将介绍）
            X = [x.to(device) for x in X]
        else:
            X = X.to(device)
        y = y.to(device)
        acc[0] += torch.sum(net(X).argmax(dim=1)==y)
        acc[1] += len(y)
    return acc[0] / acc[1]

def train(train_iter, test_iter, net, lr, epochs, device, net_init=True):
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    if net_init:
        net.apply(init_weights)
    print('training on', device)
    net.to(device)
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss = nn.CrossEntropyLoss()
    
    history = {'train_acc':[],'test_acc':[],'train_loss':[]}
    
    for epoch in range(epochs):
        net.train()
        time = t.time()
        for X, y in train_iter:
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
    
        with torch.no_grad():
            acc=torch.sum(y_hat.argmax(dim=1)==y)/len(y)             

        history['train_acc'].append(acc)
        history['train_loss'].append(l)
        test_acc=evaluate_accuracy_gpu(net, test_iter, device)
        history['test_acc'].append(test_acc)    
        print('epoch {} use{:.1f}s: train_acc {:.3f}\t test_acc: {:.4f}\t train_loss: {:.4f} '.format(epoch+1,
                                                                                                t.time()-time,
                                                                                               acc,
                                                                                               test_acc,
                                                                                               l))
    return history


lr = 0.001
history = train(train_iter, test_iter, net, lr, 8, try_gpu(), True)
lr = 0.0005
history = train(train_iter, test_iter, net, lr, 15, try_gpu(), False)
lr = 0.0001
history = train(train_iter, test_iter, net, lr, 20, try_gpu(), False)
lr = 0.00005
history = train(train_iter, test_iter, net, lr, 25, try_gpu(), False)
history = train(train_iter, test_iter, net, 0.00001, 25, try_gpu(), False)

In [None]:
torch.save(net.state_dict(),'net_weight.pt')