In [None]:
# 查看当前挂载的数据集目录
!ls /home/kesci/input/

In [None]:
# 查看个人持久化工作区文件
!ls /home/kesci/work/

In [None]:
# 查看当前kernerl下的package
!pip list --format=columns

In [None]:
# 显示cell运行时长
%load_ext klab-autotime

In [12]:
#import
import sys
sys.path.append("/home/kesci/input")
import d2lzh1981 as d2l
import torch
import torch.nn as nn
import torch.optim as optim
import time

In [13]:
class Flatten(torch.nn.Module):
    def forward(self, x):
        return x.view(x.shape[0], -1)

class Reshape(torch.nn.Module):
    def forward(self, x):
        return x.view(-1,1,28,28)
    
LeNet = torch.nn.Sequential(                                                
    Reshape(),
    nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=2),
    nn.Sigmoid(),                                                       
    nn.AvgPool2d(kernel_size=2, stride=2),                              
    nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),           
    nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2, stride=2),                              
    Flatten(),                                                          
    nn.Linear(in_features=16*5*5, out_features=120),
    nn.Sigmoid(),
    nn.Linear(120, 84),
    nn.Sigmoid(),
    nn.Linear(84, 10)
)
X = torch.randn(size=(1,1,28,28), dtype = torch.float32)
for layer in LeNet:
    X = layer(X)
    print(layer.__class__.__name__,'output shape: \t',X.shape)

Reshape output shape: 	 torch.Size([1, 1, 28, 28])
Conv2d output shape: 	 torch.Size([1, 6, 28, 28])
Sigmoid output shape: 	 torch.Size([1, 6, 28, 28])
AvgPool2d output shape: 	 torch.Size([1, 6, 14, 14])
Conv2d output shape: 	 torch.Size([1, 16, 10, 10])
Sigmoid output shape: 	 torch.Size([1, 16, 10, 10])
AvgPool2d output shape: 	 torch.Size([1, 16, 5, 5])
Flatten output shape: 	 torch.Size([1, 400])
Linear output shape: 	 torch.Size([1, 120])
Sigmoid output shape: 	 torch.Size([1, 120])
Linear output shape: 	 torch.Size([1, 84])
Sigmoid output shape: 	 torch.Size([1, 84])
Linear output shape: 	 torch.Size([1, 10])


In [14]:
# net中参数有默认初始化，这里自定义初始化参数
def init_weights(m):
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        torch.nn.init.xavier_uniform_(m.weight)
LeNet.apply(init_weights)

Sequential(
  (0): Reshape()
  (1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (2): Sigmoid()
  (3): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (4): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (5): Sigmoid()
  (6): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (7): Flatten()
  (8): Linear(in_features=400, out_features=120, bias=True)
  (9): Sigmoid()
  (10): Linear(in_features=120, out_features=84, bias=True)
  (11): Sigmoid()
  (12): Linear(in_features=84, out_features=10, bias=True)
)

In [15]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(
    batch_size=batch_size, root='/home/kesci/input/FashionMNIST2065')

In [16]:
if torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')

In [17]:
num_epochs = 5
LeNet.to(device)

loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(LeNet.parameters(), lr=0.5)

for epoch in range(num_epochs):
    train_l_sum = torch.tensor([0.0], dtype=torch.float32, device=device)
    train_acc_sum = torch.tensor([0.0], dtype=torch.float32, device=device)
    n = 0
    for X, y in train_iter:
        # train model
        LeNet.train()
        optimizer.zero_grad()
        X, y = X.to(device), y.to(device)
        y_hat = LeNet(X)
        l = loss(y_hat, y)
        l.backward()
        optimizer.step()
        
        with torch.no_grad():
            train_l_sum += l
            train_acc_sum += torch.sum(torch.argmax(y_hat, dim=1) == y)
            n += y.shape[0]
    acc_sum, n = torch.tensor([0], dtype=torch.float32, device=device), 0
    for X, y in test_iter:
        # eval model
        LeNet.eval()
        X, y = X.to(device), y.to(device)
        with torch.no_grad():
            y = y.long()
            acc_sum += torch.sum((torch.argmax(LeNet(X), dim=1) == y))
            n += y.shape[0]
    test_acc = acc_sum.item() / n
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum/n, train_acc_sum/n, test_acc))

epoch 1, loss 0.0544, train acc 0.603, test acc 0.100
epoch 2, loss 0.0519, train acc 0.957, test acc 0.387
epoch 3, loss 0.0266, train acc 3.338, test acc 0.642
epoch 4, loss 0.0204, train acc 3.951, test acc 0.624
epoch 5, loss 0.0180, train acc 4.212, test acc 0.699


In [30]:
#test
acc_sum = 0
l_sum = 0
n = 0
for data, label in test_iter:
    data, label = data.to(device), label.to(device)
    # eval model
    LeNet.eval()
    y_pre = LeNet(data)
    acc_sum += torch.sum((torch.argmax(y_pre, dim=1) == label))
    l_sum += loss(y_pre, label)
    n += label.shape[0]
print('test loss: ', l_sum.item() / n)
print('test acc: ', acc_sum.item() / n)

test loss:  0.0029852605819702146
test acc:  0.6994
