In [66]:
import time

import torch
from torch import nn

In [67]:
def corr2d(x,k):
    h,w=k.shape
    y=torch.zeros(x.shape[0]-h+1,x.shape[1]-w+1)
    for i in range(y.shape[0]):
        for j in range(y.shape[1]):
            y[i,j]=(x[i:i+h,j:j+w]*k).sum()
    return y

In [68]:
X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])
corr2d(X, K)

tensor([[19., 25.],
        [37., 43.]])

In [69]:
class conv2d(nn.Module):
    def __init__(self,kernel_size):
        super().__init__()
        self.weight=nn.Parameter(torch.rand(kernel_size))
        self.bias=nn.Parameter(torch.zeros(1))
    def forward(self,x):
        return corr2d(x,self.weight)+self.bias

In [70]:
x=torch.ones(6,8)
x[:,2:6]=0
k=torch.tensor([[1.0,-1.0]])
y=corr2d(x,k)
y

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [71]:
conv=nn.Conv2d(1,1,kernel_size=(1,2),bias=False)
x = x.reshape((1, 1, 6, 8))
y = y.reshape((1, 1, 6, 7))
lr = 3e-2  # 学习率

for i in range(10):
    y_hat=conv(x)
    l=(y_hat-y)**2
    conv.zero_grad()
    l.sum().backward()
    conv.weight.data[:]-=lr*conv.weight.grad
    if (i + 1) % 2 == 0:
        print(f'epoch {i+1}, loss {l.sum():.3f}')

conv.weight.data

epoch 2, loss 9.107
epoch 4, loss 2.405
epoch 6, loss 0.763
epoch 8, loss 0.275
epoch 10, loss 0.106


tensor([[[[ 1.0208, -0.9548]]]])

In [72]:
def comp_conv2d(conv2d,x):
    print(x.shape)
    x=x.reshape((1,1)+x.shape)
    print(x.shape)
    y=conv2d(x)
    return y.reshape(y.shape[2:])

conv2d=nn.Conv2d(1,1,kernel_size=3,padding=1)
x=torch.rand((8,8))
comp_conv2d(conv2d,x).shape
conv2d=nn.Conv2d(1,1,kernel_size=(5,3),padding=(2,1))
comp_conv2d(conv2d,x).shape
conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1, stride=2)
comp_conv2d(conv2d, X).shape

torch.Size([8, 8])
torch.Size([1, 1, 8, 8])
torch.Size([8, 8])
torch.Size([1, 1, 8, 8])
torch.Size([3, 3])
torch.Size([1, 1, 3, 3])


torch.Size([2, 2])

In [73]:
def corr2d_multi_in(X,K):
    return sum(corr2d(x,k) for x,k in zip(X,K))

In [74]:
X = torch.tensor([[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]],
               [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]])
K = torch.tensor([[[0.0, 1.0], [2.0, 3.0]], [[1.0, 2.0], [3.0, 4.0]]])

corr2d_multi_in(X, K)

tensor([[ 56.,  72.],
        [104., 120.]])

In [75]:
def corr2d_multi_out(X,K):
    return torch.stack([corr2d_multi_in(X,k1) for k1 in K],0)

In [76]:
K = torch.stack((K, K + 1, K + 2), 0)
K.shape

torch.Size([3, 2, 2, 2])

In [77]:
corr2d_multi_out(X, K)

tensor([[[ 56.,  72.],
         [104., 120.]],

        [[ 76., 100.],
         [148., 172.]],

        [[ 96., 128.],
         [192., 224.]]])

In [97]:
def corr2d_multi_in_out_1x1(X,K):
    c_i,h,w=X.shape
    c_o=K.shape[0]
    X=X.reshape((c_i,h*w))
    K=K.reshape((c_o,c_i))
    Y = torch.matmul(K, X)
    return Y.reshape((c_o,h,w))

In [98]:
X = torch.normal(0, 1, (3, 3, 3))
K = torch.normal(0, 1, (2, 3, 1, 1))
K.shape
Y1 = corr2d_multi_in_out_1x1(X, K)
Y2 = corr2d_multi_out(X, K)
# assert float(torch.abs(Y1 - Y2).sum()) < 1e-6

torch.Size([2, 3])


In [105]:
def pool2d(X,pool_size,model='max'):
    p_h,p_w=pool_size
    Y=torch.zeros([X.shape[0]-p_h+1,X.shape[1]-p_w+1])
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if model=='max':
                Y[i, j] = X[i: i + p_h, j: j + p_w].max()
            else:
                Y[i, j] = X[i:i+p_h,j:j+p_w].mean()
    return Y

In [106]:
X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
pool2d(X,[2,2],'avg')

tensor([[2., 3.],
        [5., 6.]])

In [116]:
X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))
pool2d = nn.MaxPool2d(3,padding=1,stride=2)
pool2d(X)
pool2d = nn.MaxPool2d((2, 3), stride=(2, 3), padding=(0, 1))
pool2d(X)

tensor([[[[ 5.,  7.],
          [13., 15.]]]])

In [120]:
X=torch.cat((X,X+1),1)
pool2d = nn.MaxPool2d(3, padding=1, stride=2)
pool2d(X)

tensor([[[[ 5.,  7.],
          [13., 15.]],

         [[ 6.,  8.],
          [14., 16.]],

         [[ 6.,  8.],
          [14., 16.]],

         [[ 7.,  9.],
          [15., 17.]]]])

In [122]:
net=nn.Sequential(
    nn.Conv2d(1,6,kernel_size=5,padding=2),nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2,stride=2),
    nn.Conv2d(6,16,kernel_size=5),nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2,stride=2),
    nn.Flatten(),
    nn.Linear(16*5*5,120),nn.Sigmoid(),
    nn.Linear(120,84),nn.Sigmoid(),
    nn.Linear(84,10)
)

In [123]:
X = torch.rand(size=(1, 1, 28, 28), dtype=torch.float32)
for layer in net:
    X=layer(X)
    print(layer.__class__.__name__,'output shape: \t',X.shape)

Conv2d output shape: 	 torch.Size([1, 6, 28, 28])
Sigmoid output shape: 	 torch.Size([1, 6, 28, 28])
AvgPool2d output shape: 	 torch.Size([1, 6, 14, 14])
Conv2d output shape: 	 torch.Size([1, 16, 10, 10])
Sigmoid output shape: 	 torch.Size([1, 16, 10, 10])
AvgPool2d output shape: 	 torch.Size([1, 16, 5, 5])
Flatten output shape: 	 torch.Size([1, 400])
Linear output shape: 	 torch.Size([1, 120])
Sigmoid output shape: 	 torch.Size([1, 120])
Linear output shape: 	 torch.Size([1, 84])
Sigmoid output shape: 	 torch.Size([1, 84])
Linear output shape: 	 torch.Size([1, 10])


In [131]:
batch_size = 256
import torchvision
import torchvision.transforms as transforms
from torch.utils import data
mnist_train = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST', train=True, download=True, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST', train=False, download=True, transform=transforms.ToTensor())
mnist_test,mnist_train
train_iter=data.DataLoader(mnist_train,batch_size,shuffle=True,num_workers=4)
test_iter=data.DataLoader(mnist_test,batch_size,shuffle=True,num_workers=4)

In [138]:
import time
def train(net,train_iter,test_iter,batch_size,optimizer,device,num_epochs):
    net=net.to(device)
    print("training on: ",device)
    loss=nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,n_train,n_test,batch_count,start,test_acc_sum=0.0,0.0,0,0,0,time.time(),0
        for X,y in train_iter:
            X=X.to(device)
            y=y.to(device)
            y_hat=net(X)
            l=loss(y_hat,y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum+=(y_hat.argmax(dim=1)==y).sum().cpu().item()
            n_train+=y.shape[0]
            batch_count+=1
        for X,y in test_iter:
            net.eval()
            test_acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
            net.train()
            n_test+=y.shape[0]
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n_train, test_acc_sum/n_test, time.time() - start))



In [139]:
lr, num_epochs = 0.001, 5
device="cuda"
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
train(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

training on:  cuda
epoch 1, loss 1.7892, train acc 0.353, test acc 0.591, time 16.3 sec
epoch 2, loss 0.9025, train acc 0.652, test acc 0.682, time 6.0 sec
epoch 3, loss 0.7344, train acc 0.723, test acc 0.738, time 6.2 sec
epoch 4, loss 0.6518, train acc 0.747, test acc 0.747, time 5.7 sec
epoch 5, loss 0.6029, train acc 0.766, test acc 0.767, time 5.4 sec
