In [185]:
import random
from PIL import Image, ImageDraw, ImageFont
import torch
from torch import nn
from torch.utils.data import DataLoader
import numpy as np

In [51]:
def number_to_image(number, size=(28, 14)):
    # 定义数字到字符的映射
    number_to_char = {
        0: '0', 1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 8: '8', 9: '9'
    }

    # 将数字转换为字符
    char = number_to_char[number]

    # 创建一个空白图片
    img = Image.new('L', size[-1::-1], color=255)

    # 在图片上绘制数字
    draw = ImageDraw.Draw(img)
    font = ImageFont.truetype('arial.ttf', 28)  # 你可以根据需要选择其他字体
    draw.text((0, 0), char, font=font, fill=0)

    return img


In [74]:
class NumberDataset:
    def __init__(self, size=(28, 14), iters=1000):
        self.iters = iters
        self.images = [np.array(number_to_image(number, size)) for number in range(10)]

    def __getitem__(self, _):
        num = random.randint(0, 9)
        return self.images[num][None,...], num

    def __len__(self):
        return self.iters

In [109]:
class Rec(nn.Module):
    def __init__(self, size=(28, 14)):
        super(Rec, self).__init__()
        self.row = nn.Parameter(torch.ones(1, 1, size[0]))
        self.col = nn.Parameter(torch.ones(1, 1, size[1]))
        self.weights = nn.Parameter(torch.ones(1, 1, size[0], size[1]))
        self.classifier = nn.Linear(3, 10)
    
    def forward(self, x):
        # Softmax for row and col
        row_softmax = torch.softmax(self.row, dim=-1)
        col_softmax = torch.softmax(self.col, dim=-1)
        # Multiply row and col
        multiplied_row_col = row_softmax.unsqueeze(-1) *  col_softmax.unsqueeze(-2)
        
        # Element-wise multiplication with weights and x
        weighted_x = x * multiplied_row_col * self.weights
        weighted_x = weighted_x.flatten(1)

        h = []
        for _ in range(3):
            v, idx = torch.max(weighted_x, 1)
            h.append(v)
            idx = nn.functional.one_hot(idx, weighted_x.size(1))
            weighted_x = weighted_x * (1 - idx)
        
        h = torch.stack(h, dim=1)
        
        return self.classifier(h)


In [106]:
def train(batch_size=32, iter=1000, size=(28, 14)):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    dataset = NumberDataset(size, iter)
    dataloader = DataLoader(dataset, batch_size=batch_size)
    model = Rec(size).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-1)
    for i, (images, labels) in enumerate(dataloader):
        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = nn.functional.cross_entropy(outputs, labels)
        loss.backward()
        optimizer.step()
        print(f'Iter {i}, Loss: {loss.item()}')
    return model

In [107]:
model = train(iter=32*1000)

Iter 0, Loss: 2.4886677265167236
Iter 1, Loss: 2.2672061920166016
Iter 2, Loss: 2.5261788368225098
Iter 3, Loss: 2.4924709796905518
Iter 4, Loss: 2.403188467025757
Iter 5, Loss: 2.509498119354248
Iter 6, Loss: 2.3198537826538086
Iter 7, Loss: 2.283595561981201
Iter 8, Loss: 2.4199378490448
Iter 9, Loss: 2.275085210800171
Iter 10, Loss: 2.458631753921509
Iter 11, Loss: 2.294389486312866
Iter 12, Loss: 2.469709873199463
Iter 13, Loss: 2.4438679218292236
Iter 14, Loss: 2.2742748260498047
Iter 15, Loss: 2.350750207901001
Iter 16, Loss: 2.2735185623168945
Iter 17, Loss: 2.3753228187561035
Iter 18, Loss: 2.312957286834717
Iter 19, Loss: 2.289559841156006
Iter 20, Loss: 2.4434070587158203
Iter 21, Loss: 2.311736583709717
Iter 22, Loss: 2.2251698970794678
Iter 23, Loss: 2.215135335922241
Iter 24, Loss: 2.2347946166992188
Iter 25, Loss: 2.2686538696289062
Iter 26, Loss: 2.290188789367676
Iter 27, Loss: 2.3577957153320312
Iter 28, Loss: 2.4085276126861572
Iter 29, Loss: 2.2971243858337402
Iter 3

In [175]:
class Recog(nn.Module):
    def __init__(self, model):
        super(Recog, self).__init__()
        
        # 获取行和列的权重数据
        row = model.row.data
        col = model.col.data
        
        # 获取纵向的 best 列 (top-1) 和横向的 best 行 (top-3)
        best_col = torch.topk(col, 1, -1).indices.flatten()  # 取列方向 top-1 索引
        best_row = torch.topk(row, 3, -1).indices.flatten()  # 取行方向 top-3 索引

        # 生成组合的 indices
        self.indices = [(r.item(), best_col.item()) for r in best_row]
        print(self.indices)
        
        # 分类器，输入维度 3，对应 top-3 的行信息，输出为 10 类
        self.classifier = nn.Sequential(
            nn.Linear(3, 10),
        )

    def forward(self, x):
        extracted_features = []

            
        # 遍历 self.indices 中的坐标对，提取对应位置的值
        for (row_idx, col_idx) in self.indices:
            feature_value = x[:, :, row_idx, col_idx]  # 提取特征值
            extracted_features.append(feature_value)
        # 将所有提取的特征堆叠为一个 tensor，形状为 [batch_size, 3]
        extracted_features = torch.cat(extracted_features, dim=1)
        
        # 传入分类器进行分类
        out = self.classifier(extracted_features)
        return out


In [162]:
def refine(model, batch_size=32, iters=1000, size=(28, 14)):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    dataset = NumberDataset(size, iters=iters)
    dataloader = DataLoader(dataset, batch_size=batch_size)
    model = Recog(model).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-1)
    for i, (images, labels) in enumerate(dataloader):
        images = images.to(device)
        images = images.float()
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = nn.functional.cross_entropy(outputs, labels)
        loss.backward()
        optimizer.step()
        print(f'Iter {i}, Loss: {loss.item()}')
    return model
    

In [176]:
net = refine(model, iters=32*500)

[(21, 9), (23, 9), (13, 9)]
Iter 0, Loss: 114.80853271484375
Iter 1, Loss: 166.4661102294922
Iter 2, Loss: 133.0830841064453
Iter 3, Loss: 88.10472869873047
Iter 4, Loss: 124.2704849243164
Iter 5, Loss: 98.24358367919922
Iter 6, Loss: 61.2487907409668
Iter 7, Loss: 60.45774459838867
Iter 8, Loss: 40.07974624633789
Iter 9, Loss: 39.68037414550781
Iter 10, Loss: 44.85774230957031
Iter 11, Loss: 54.256103515625
Iter 12, Loss: 66.04409790039062
Iter 13, Loss: 29.10091781616211
Iter 14, Loss: 33.454246520996094
Iter 15, Loss: 21.744857788085938
Iter 16, Loss: 24.298070907592773
Iter 17, Loss: 27.944316864013672
Iter 18, Loss: 22.294322967529297
Iter 19, Loss: 23.44327735900879
Iter 20, Loss: 21.243778228759766
Iter 21, Loss: 17.17523193359375
Iter 22, Loss: 20.602893829345703
Iter 23, Loss: 6.984119892120361
Iter 24, Loss: 14.702832221984863
Iter 25, Loss: 16.568063735961914
Iter 26, Loss: 23.183303833007812
Iter 27, Loss: 12.050646781921387
Iter 28, Loss: 16.65751075744629
Iter 29, Loss: 1

In [177]:
for name, param in net.named_parameters():
    print(name, param)

classifier.0.weight Parameter containing:
tensor([[ 0.2946, -0.1655,  0.6585],
        [-0.5076, -0.8356, -0.4289],
        [ 0.0706,  0.5776,  0.4847],
        [ 0.6253,  0.2506, -0.1518],
        [ 0.0292,  0.3580,  0.6305],
        [ 0.8279,  0.1487, -1.0714],
        [ 0.4989,  0.7101, -0.8247],
        [ 0.1666,  1.1126, -1.7727],
        [ 0.4917,  0.2399,  0.2070],
        [ 0.3406, -0.8976,  0.8716]], device='cuda:0', requires_grad=True)
classifier.0.bias Parameter containing:
tensor([-0.6053,  9.5437, -2.6593, -0.3391,  4.2295,  0.3279, -0.5293,  0.2820,
        -1.1937, -0.6728], device='cuda:0', requires_grad=True)


In [186]:
import numpy as np

def recog(image):
    indices = ((21, 9), (23, 9), (13, 9))
    
    # 将权重和偏置转换为 NumPy 数组，便于矩阵运算
    weights = np.array([
        [ 0.2946, -0.1655,  0.6585],
        [-0.5076, -0.8356, -0.4289],
        [ 0.0706,  0.5776,  0.4847],
        [ 0.6253,  0.2506, -0.1518],
        [ 0.0292,  0.3580,  0.6305],
        [ 0.8279,  0.1487, -1.0714],
        [ 0.4989,  0.7101, -0.8247],
        [ 0.1666,  1.1126, -1.7727],
        [ 0.4917,  0.2399,  0.2070],
        [ 0.3406, -0.8976,  0.8716]
    ])
    bias = np.array([-0.6053,  9.5437, -2.6593, -0.3391,  4.2295,  0.3279, -0.5293,  0.2820,
                     -1.1937, -0.6728])
    
    # 将 Pillow 图像转换为 NumPy 数组（如果多次使用，最好在外部预先转换）
    image_np = np.array(image)
    
    # 提取指定的 (row, col) 对应的特征值
    extracted_features = np.array([image_np[row, col] for row, col in indices])
    
    # 使用矩阵运算来计算权重与特征的点积，并加上偏置
    judge = np.dot(weights, extracted_features) + bias
    
    # 找到最大值的索引
    return np.argmax(judge)


In [179]:
def test(num):
    image = number_to_image(num)
    res = recog(image)
    print(num, res)

In [188]:
%time
for i in range(10):
    test(i)

CPU times: total: 0 ns
Wall time: 0 ns
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
