# 代码实践题目

## 题目1（奇偶数预测问题）

In [28]:
import torch
import torch.nn as nn
import math

In [128]:
class OddAndEven(nn.Module):
    def __init__(self) -> None:
        super().__init__()

        # directly input data
        self.net_direct = nn.Sequential(
            nn.Linear(10, 128),
            nn.ReLU(True),
            nn.Linear(128, 2),
        )

        # turn input to binary format，binary encoding length = 9
        self.net_binary = nn.Sequential(
            nn.Linear(10, 128),
            nn.ReLU(True),
            nn.Linear(128, 2),
        )

        # use trigonometric function to encode，length = 9
        self.net_trigo = nn.Sequential(
            nn.Linear(10, 128),
            nn.ReLU(True),
            nn.Linear(128, 2),
        )

        self.sigmoid = nn.Sigmoid()


    def _binary_encode(self, x):
        
        def num2binary(x):
            if x == 0:
                return []
            else:
                temp = x % 2
                l = num2binary(x//2)
                l.append(temp)
                return l

        binary_encode = []
        for i in range(x.size(0)):
            sub_1 = num2binary(x[i])
            while len(sub_1) < 10: sub_1.insert(0, 0)
            binary_encode.append(sub_1)

        return torch.tensor(binary_encode, dtype=torch.float)
    
    
    def _trigo_encode(self, x):
        # div_term = torch.exp(torch.arange(0, 10, 2) *
        #                      -(math.log(10000.0) / 10)).to(x.device)
        trigo_encode = torch.zeros(x.size(0), 10).to(x.device)
        # trigo_encode[..., 0::2] = torch.sin(x * div_term)
        # trigo_encode[..., 1::2] = torch.cos(x * div_term)
        trigo_encode[..., 0::2] = torch.sin(x).repeat(1, 5)
        trigo_encode[..., 1::2] = torch.cos(x).repeat(1, 5)


        return trigo_encode.to(x.device)
    
    def forward(self, x):
        binary_encode = self._binary_encode(x).to(x.device)
        trigo_encode = self._trigo_encode(x).to(x.device)
        origin_encode = x.repeat(1, 10).to(x.device)

        # binary_result = torch.argmax(self.sigmoid(self.net_direct(binary_encode)), dim=-1)
        # trigo_result = torch.argmax(self.sigmoid(self.net_direct(trigo_encode)), dim=-1)
        # origin_result = torch.argmax(self.sigmoid(self.net_direct(origin_encode)), dim=-1)
        binary_result = self.net_binary(binary_encode)
        trigo_result = self.net_trigo(trigo_encode)
        origin_result = self.net_direct(origin_encode)

        return binary_result, trigo_result, origin_result


In [137]:
def main():
    batch_size = 64
    epochs = 200
    net = OddAndEven()
    net.to(device='cuda')
    net.train()
    loss_fn = nn.CrossEntropyLoss()

    optimizer = torch.optim.Adam(net.parameters(),lr=1e-3)
    
    for epoch in range(epochs):
        data = torch.randint(0, 1000, size=(batch_size, 1), dtype=torch.float).cuda()
        label = (data % 2 == 0).long().cuda().squeeze()
        b, t, o = net(data)
        l1 = loss_fn(b, label)
        l2 = loss_fn(t, label)
        l3 = loss_fn(o, label)
        
        l1.backward()
        l2.backward()
        l3.backward()
        optimizer.step()
        optimizer.zero_grad()

        print(f"epoch:[{epoch:>3d}/{epochs:>3d}] binary_loss:{l1} triangle_loss:{l2} origin_loss:{l3}")
    
    print('----------------------------- Test -----------------------------' )
    data = torch.randint(0, 10, size=(10, 1), dtype=torch.float).cuda()
    label = (data % 2 == 0).long().cuda().squeeze()
    b, t, o = net(data)
    print('binary_result:   ', torch.argmax(b, dim=-1))
    print('triangle_result: ', torch.argmax(t, dim=-1))
    print('origin_result:   ', torch.argmax(o, dim=-1))
    print('label:           ', label)

In [138]:
main()

  l = num2binary(x//2)


epoch:[  0/200] binary_loss:0.7349872589111328 triangle_loss:0.6947663426399231 origin_loss:52.24180221557617
epoch:[  1/200] binary_loss:0.683316171169281 triangle_loss:0.6998624801635742 origin_loss:28.34395408630371
epoch:[  2/200] binary_loss:0.6706985235214233 triangle_loss:0.6971087455749512 origin_loss:9.654135704040527
epoch:[  3/200] binary_loss:0.6902305483818054 triangle_loss:0.7240961790084839 origin_loss:13.732337951660156
epoch:[  4/200] binary_loss:0.6652613878250122 triangle_loss:0.7089925408363342 origin_loss:22.04283905029297
epoch:[  5/200] binary_loss:0.6852871775627136 triangle_loss:0.6977459788322449 origin_loss:23.781091690063477
epoch:[  6/200] binary_loss:0.6469615697860718 triangle_loss:0.7086113691329956 origin_loss:18.387901306152344
epoch:[  7/200] binary_loss:0.6761390566825867 triangle_loss:0.6878255009651184 origin_loss:11.120370864868164
epoch:[  8/200] binary_loss:0.6498953700065613 triangle_loss:0.6898926496505737 origin_loss:3.8951528072357178
epoch:

## 总结

### 结果

![](demo/homework3_2_result.png)




### 设计

这里为了网络参数公平，另编码长度保证相同，三种方式如下

- 二进制方法：对 [0,999] 范围的数转变成 10 位二进制数
- 三角函数法：对 sin、cos 进行交替编码，分别尝试了 postion_encoding、直接对原来的数进行 cos、sin
- 直接数字法：将输入数字重复 10 次

### 分析
针对三种方案，其中 `二进制` 的方式是可行的

- `二进制`方法：因为二进制最后一位数与数的奇偶性具有强相关性，故经过迭代基本能够无误实现判断

- `三角函数`方法：按道理来说，三角函数具有周期性，只要能够学到周期为1，那应该是能够收敛的，但是我在测试的时候并没有收敛，可能是我设计的编码他的周期不为1，导致出现了奇偶模糊，因为 position_encoding 中有 $PE_{t+\Delta t} = T_{\Delta t} PE_{t}$ 的变换关系

- `直接数字`方法：完全不收敛


## 题目2（应用实践

![](demo/homework3_2.png)