In [21]:
import torch
import torch.utils.data as Data 
from torch.nn import init
import torch.optim as optim 
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from collections import OrderedDict
import numpy as np
import time 
import sys
from IPython import display
from matplotlib import pyplot as plt

In [2]:
def show_fashion_mnist(images_list, labels_list):
    display.set_matplotlib_formats('svg')
    _, figs_list = plt.subplots(1, len(images_list), figsize = (12, 12))
    for fig, image, label in zip(figs_list, images_list, labels_list):
        fig.imshow(image.view(28, 28).numpy())
        fig.set_title(label)
        fig.axes.get_xaxis().set_visible(False)
        fig.axes.get_yaxis().set_visible(False)
    plt.show()

In [3]:
def get_fashion_mnist_labels(labels):
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i)] for i in labels]

In [3]:
mnist_train = torchvision.datasets.FashionMNIST(root = './Dataset/FashionMNIST', train = True, 
                                                    download = True, transform = transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root = './Dataset/FashionMNIST', train = False, 
                                                   download = True, transform = transforms.ToTensor())
images_list, labels_list = [], []
for i in range(10):
    images_list.append(mnist_train[i][0])
    labels_list.append(mnist_train[i][1])
show_fashion_mnist(images_list, get_fashion_mnist_labels(labels_list))    

NameError: name 'get_fashion_mnist_labels' is not defined

In [18]:
#数据切割
def load_data_fashion_mnist(batch_size):
    mnist_train = torchvision.datasets.FashionMNIST(root = './Dataset/FashionMNIST', train = True, 
                                                    download = True, transform = transforms.ToTensor())
    mnist_test = torchvision.datasets.FashionMNIST(root = './Dataset/FashionMNIST', train = False, 
                                                   download = True, transform = transforms.ToTensor())
    if sys.platform.startswith('win'):
        num_workers = 4
    else:
        num_workers = 0
    train_iter = Data.DataLoader(mnist_train, batch_size = batch_size, shuffle = True, num_workers = num_workers)
    test_iter = Data.DataLoader(mnist_test, batch_size = batch_size, shuffle = True, num_workers = num_workers)
    return train_iter, test_iter

train_iter, test_iter = load_data_fashion_mnist(batch_size = 256)

In [63]:
#参数初始化
num_inputs = 784
num_outputs = 10
w = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_outputs)), dtype = torch.float, requires_grad = True)
b = torch.zeros(num_outputs, dtype = torch.float, requires_grad = True)

In [64]:
#softmax 函数
def softmax_fn(X):
    X_exp = X.exp()
    partition = X_exp.sum(dim = 1, keepdim = True)
    return X_exp/partition

In [70]:
#loss 函数
def cross_entropy(y_hat, y):
    return (-torch.log(y_hat.gather(1, y.view(-1,1)))).mean()

In [66]:
#evaluation 函数
def accuracy(y_hat, y):
    return (y_hat.argmax(dim = 1, keepdim = True) == y.view(-1,1)).float().mean().item()

In [67]:
#model 函数
def network(x, w, b):
    return softmax_fn(torch.mm(x.view(-1, w.shape[0]), w) + b)

In [68]:
#正确率评估
def evaluate_accuracy(data_iter, model, params = None):
    acc_sum, n = 0.0, 0
    if params is not None:
        for X, y in data_iter:
            acc_sum += (model(X, *params).argmax(dim=1) == y).float().sum().item()
            n += y.shape[0]
    else:   
        for X, y in data_iter:
            acc_sum += (model(X).argmax(dim=1) ==y).float().sum().item()
            n += y.shape[0]
    return acc_sum/n

In [74]:
def sgd(params, lr, batch_size):
    for param in params:
        param.data -= lr*param.grad 

In [75]:
def train_ch3(n_epoch, batch_size, train_iter, test_iter, model, loss_fn, evaluate_accuracy, params = None, lr = None, optimizer = None):
    for epoch in range(n_epoch):
        train_loss_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            if params is not None:
                y_hat = model(X, *params)
            else:
                y_hat = model(X)
            
            loss = loss_fn(y_hat, y)
            
            #梯度清理
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            
            #backpropagation
            loss.backward()
            
            #参数更新
            if optimizer is None:
                sgd(params, lr, batch_size)
            else:
                optimizer.step()
                
            #train_loss, train_acc
            train_loss_sum += loss.item()
            train_acc_sum += (y_hat.argmax(dim = 1, keepdim = True) == y.view(-1,1)).float().sum().item() 
            n += y.shape[0]
            
        if epoch%5 ==0:
            if params is not None:
                test_acc = evaluate_accuracy(test_iter, model, params)
            else:
                test_acc = evaluate_accuracy(test_iter, model)
            train_acc = train_acc_sum/n
            train_loss = train_loss_sum/n
            print('epoch: %d, train_loss: %.4f, train_acc: %.2f, test_acc: %.2f'%(epoch, train_loss, train_acc, test_acc))

In [76]:
train_ch3(n_epoch = 30, batch_size = 256, train_iter =train_iter, test_iter = test_iter, 
          model = network, loss_fn = cross_entropy, evaluate_accuracy = evaluate_accuracy, 
          params = [w, b], lr = 1e-2, optimizer = None)

epoch: 0, train_loss: 0.0045, train_acc: 0.68, test_acc: 0.69
epoch: 5, train_loss: 0.0026, train_acc: 0.79, test_acc: 0.78
epoch: 10, train_loss: 0.0023, train_acc: 0.81, test_acc: 0.80
epoch: 15, train_loss: 0.0022, train_acc: 0.82, test_acc: 0.81
epoch: 20, train_loss: 0.0021, train_acc: 0.83, test_acc: 0.81
epoch: 25, train_loss: 0.0020, train_acc: 0.83, test_acc: 0.82


In [41]:
num_inputs = 784
num_outputs = 10

class LinearNet(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()
        self.linear = nn.Linear(num_inputs, num_outputs)
    def forward(self, x):
        y = self.linear(x.view(x.shape[0], -1))
        return y

class FlattenLayer(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self, x):
        return x.view(x.shape[0],-1)
        

In [43]:
network = LinearNet(num_inputs, num_outputs)
#network = nn.Sequential(OrderedDict([('flatten', FlattenLayer()), ('linear', nn.Linear(num_inputs, num_outputs))])))

In [59]:
init.normal_(network.linear.weight, mean = 0, std = 0.01)
init.constant_(network.linear.bias, val = 0)

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

In [60]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(network.parameters(), lr = 1e-2)

In [61]:
train_ch3(n_epoch = 20, batch_size = 256, 
          train_iter = train_iter, test_iter = test_iter, 
          model = network, loss_fn = loss_fn, evaluate_accuracy = evaluate_accuracy, 
          params = None, lr = None, optimizer = optimizer)

epoch: 0, train_loss: 0.0054, train_acc: 0.64, test_loss: 0.68
epoch: 5, train_loss: 0.0026, train_acc: 0.79, test_loss: 0.78
epoch: 10, train_loss: 0.0023, train_acc: 0.81, test_loss: 0.80
epoch: 15, train_loss: 0.0022, train_acc: 0.82, test_loss: 0.81


In [119]:
def model(t_u, w, b):
    t_p = w*t_u + b
    return t_p

In [120]:
def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()

In [123]:
def training_loop(n_epochs, learning_rate, params, t_u, t_c):
    for epoch in range(1, n_epochs+1):
        if params.grad is not None:
            params.grad.zero_()
        loss = loss_fn(model(t_u, *params), t_c)
        loss.backward()
        params = (params - learning_rate*params.grad).detach().requires_grad_()  
        if epoch%50 == 0:
            print('Epoch %d, Loss %f'% (epoch, loss.float()))
    return params