# FiBiNet

下述流程，暂不考虑连续特征
- 假设所有连续特征都做了离散化处理

In [1]:
%cd /playground/sgd_deep_learning/sgd_rec_sys/
import sys
sys.path.append('./python')

/playground/sgd_deep_learning/sgd_rec_sys


In [2]:
import torch
from torch import nn
from torch.utils.data import DataLoader
import torch.optim as optim

import pickle
import numpy as np

from sgd_rec_sys.cross import FiBiNet
from sgd_rec_sys.data import FakeCtrDataFactory, CtrDataset011, ctr_collate_fn_011

## CTR fake data生成
```
 multi-hot的最大采样数可以在FakeDataFactory中设置，默认为4
    def make_multi_hot_fea(self, n_samples, fea_list, max_len=4):
        ...
```

In [3]:
# input
n_samples = 1000 # 总样本数
n_dense_fea = 0 # 0 不生成连续数据
embedding_dim = K = 32 # fibinet所有特征维度固定，统一为设为K

one_hot_fea_list = [100, 34, 42, 10] # 4个fea, 字典大小分别为100，34，42，10
multi_hot_fea_list = [10, 20, 30] # 3个fea，字典大小分别为10，20，30

fields_num = F = len(one_hot_fea_list) + len(multi_hot_fea_list) # 总特征数
fields_dim = int(F*(1+F)/2 * K) #离散特征cross后,总编码维度

## 生成伪CTR数据
dump_file = './data/fake/tmp.pkl'
fake_data_factory = FakeCtrDataFactory(n_samples,
                                    n_dense_fea,
                                    one_hot_fea_list,
                                    multi_hot_fea_list,
                                    dtype=np.float32)
fake_data_factory.presist(dump_file)

one-hot feas success, shape: (1000, 4)
multi-hot feas success
label success, shape: (1000,)


## 参数设置

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('device:', device)

# 训练参数
train_batch_size = 64
epochs = 5

with open(dump_file,'rb') as f:
    fake_data = pickle.load(f)
train_ds = CtrDataset011(fake_data)
train_dl = DataLoader(train_ds, batch_size=train_batch_size, shuffle=True, collate_fn=ctr_collate_fn_011(device))

# 网络结构
hidden_dims = [fields_dim*4, 1024, 256] # deep net 不包括分类层
reduction_ratio = 2 # senet中，类似autoencoder中间表示维度 = input_dim/reduction_ratio

# 定义模型
model = FiBiNet(senet_r=2,
                fix_emb_dim=K, # fibinet所有特征维度固定，统一为设为K
                deepnet_hidden_dims=hidden_dims,
                one_hot_fea_list=one_hot_fea_list,
                multi_hot_fea_list=multi_hot_fea_list,
                ).to(device)

criterion = nn.BCELoss()  # 二分类交叉熵损失函数
optimizer = optim.SGD(model.parameters(), lr=0.01)  # SGD 优化器
# optimizer = torch.optim.Adam(model.parameters(), lr=1e-1, weight_decay=0.3)


device: cuda


  from .autonotebook import tqdm as notebook_tqdm


## training

In [5]:
def train(dataloader, model, epochs=1,):
    for epoch in range(epochs):
        for x in dataloader:
            y, one_hot_x, multi_hot_x = x
            # 前向传播
            outputs = model((one_hot_x, multi_hot_x))
            loss = criterion(outputs, y.reshape(-1,1))
        
        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # log
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')
            
train(train_dl, model, epochs)

Epoch [1/5], Loss: 0.6925
Epoch [2/5], Loss: 0.6926
Epoch [3/5], Loss: 0.6937
Epoch [4/5], Loss: 0.6938
Epoch [5/5], Loss: 0.6921


## fibinet使用演示 END

---
## Tricks in bilinear interaction layer
    下述tricks与fibinet的实现细节相关，可以跳过

* tirck1: for self.w_type == 'field-all':

In [6]:
import torch
'''
演示
    A1 [K,F,1] @ A2 [K,1,F] 
        => out1 [K,F,F]
        => out2 [F,F,K]
'''
F, K = 3,2
A = torch.tensor([[1,10],
                  [2,20],
                  [3,30]])
print(A.shape)

# 将矩阵 A 和 B 分别增加一个维度，然后执行元素乘积
A1 = A.unsqueeze(1).permute((2,0,1))  # 在第二个维度上增加一个维度
A2 = A.unsqueeze(0).permute((2,0,1))  # 在第一个维度上增加一个维度
print(A1.shape, A2.shape)

out1 = A1 @ A2
print(out1.shape)
print(out1)

out2 = out1.permute((1,2, 0))
print(out2.shape)
print(out2)

torch.Size([3, 2])
torch.Size([2, 3, 1]) torch.Size([2, 1, 3])
torch.Size([2, 3, 3])
tensor([[[  1,   2,   3],
         [  2,   4,   6],
         [  3,   6,   9]],

        [[100, 200, 300],
         [200, 400, 600],
         [300, 600, 900]]])
torch.Size([3, 3, 2])
tensor([[[  1, 100],
         [  2, 200],
         [  3, 300]],

        [[  2, 200],
         [  4, 400],
         [  6, 600]],

        [[  3, 300],
         [  6, 600],
         [  9, 900]]])


In [7]:
import torch
'''
演示
    Vi [B,K,F,1] @ W [B,K,1,F] 
        => out1 [B,K,F,F]
        => out2 [B,F,F,K]
'''

# 示例矩阵 A 和 B
B,F,K =2,3,2
A = torch.tensor([[[1,10],[2,20],[3,30]],
                  [[4,41],[5, 51],[6,61]]])

print(A.shape)

# 将矩阵 A 和 B 分别增加一个维度，然后执行元素乘积
A1 = A.unsqueeze(2).permute((0,3,1,2))  # 在第二个维度上增加一个维度
A2 = A.unsqueeze(1).permute((0,3,1,2))  # 在第一个维度上增加一个维度
print(A1.shape, A2.shape)

out1 = A1@A2
print(out1.shape)
print(out1)

out2 = out1.permute((0,2,3,1))
print(out2.shape)
print(out2[0])
print(out2[1])

torch.Size([2, 3, 2])
torch.Size([2, 2, 3, 1]) torch.Size([2, 2, 1, 3])
torch.Size([2, 2, 3, 3])
tensor([[[[   1,    2,    3],
          [   2,    4,    6],
          [   3,    6,    9]],

         [[ 100,  200,  300],
          [ 200,  400,  600],
          [ 300,  600,  900]]],


        [[[  16,   20,   24],
          [  20,   25,   30],
          [  24,   30,   36]],

         [[1681, 2091, 2501],
          [2091, 2601, 3111],
          [2501, 3111, 3721]]]])
torch.Size([2, 3, 3, 2])
tensor([[[  1, 100],
         [  2, 200],
         [  3, 300]],

        [[  2, 200],
         [  4, 400],
         [  6, 600]],

        [[  3, 300],
         [  6, 600],
         [  9, 900]]])
tensor([[[  16, 1681],
         [  20, 2091],
         [  24, 2501]],

        [[  20, 2091],
         [  25, 2601],
         [  30, 3111]],

        [[  24, 2501],
         [  30, 3111],
         [  36, 3721]]])


In [8]:
import torch
'''
演示
    利用mask从[F,F]中提取下三角矩阵元素
'''
matrix = torch.tensor([[1, 2, 3],
                       [4, 5, 6],
                       [7, 8, 9]])

# 生成下三角部分的逻辑掩码
mask = torch.tril(torch.ones(3,3, dtype=torch.bool))
print(mask)

# 使用逻辑掩码提取下三角部分并展平为一维张量
lower_triangle_1d = matrix[mask]

print("Lower triangle without zeros as 1D tensor:")
print(lower_triangle_1d)

tensor([[ True, False, False],
        [ True,  True, False],
        [ True,  True,  True]])
Lower triangle without zeros as 1D tensor:
tensor([1, 4, 5, 7, 8, 9])


trick2：self.w_type == 'field-each':

e:[B, F, K] => e[B,F,1,K]
e[B,F,1,K] @ W [F,K,K] => [B, F, 1, K]  
@支持上述操作
每个F相互对应, B维度可以自动broadcast
'''

In [9]:
import torch

'''
演示
    Vi:[B, F, 1, K] @ Wi: [F, K, K]
        => [B, F, 1, K]
'''
B, F, K = 2, 3, 4

A = torch.tensor([[i*(10**j) for j in range(K)] for i in range(1, F+1)], dtype=torch.float32) # FK
AA  = torch.stack([A, 5*A],dim=0).reshape(B,F,1,K)
print(AA.shape)

B = torch.ones(K,K)
BB = torch.stack([B,2*B, 3*B], dim=0)
print(BB.shape)

print(AA)
print(BB)

C = AA@BB
print(C.shape) # [B,F,1, K]
print(C)


torch.Size([2, 3, 1, 4])
torch.Size([3, 4, 4])
tensor([[[[1.0000e+00, 1.0000e+01, 1.0000e+02, 1.0000e+03]],

         [[2.0000e+00, 2.0000e+01, 2.0000e+02, 2.0000e+03]],

         [[3.0000e+00, 3.0000e+01, 3.0000e+02, 3.0000e+03]]],


        [[[5.0000e+00, 5.0000e+01, 5.0000e+02, 5.0000e+03]],

         [[1.0000e+01, 1.0000e+02, 1.0000e+03, 1.0000e+04]],

         [[1.5000e+01, 1.5000e+02, 1.5000e+03, 1.5000e+04]]]])
tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[2., 2., 2., 2.],
         [2., 2., 2., 2.],
         [2., 2., 2., 2.],
         [2., 2., 2., 2.]],

        [[3., 3., 3., 3.],
         [3., 3., 3., 3.],
         [3., 3., 3., 3.],
         [3., 3., 3., 3.]]])
torch.Size([2, 3, 1, 4])
tensor([[[[ 1111.,  1111.,  1111.,  1111.]],

         [[ 4444.,  4444.,  4444.,  4444.]],

         [[ 9999.,  9999.,  9999.,  9999.]]],


        [[[ 5555.,  5555.,  5555.,  5555.]],

         [[22220., 22220., 22220., 22

In [10]:
'''
演示
    自动broadcast
    [B, Fi, Fj, K] * [B, 1, Fj, K] => [B, Fi, Fj, K]
'''

B, Fi, Fj, K = 2,3,4,5

a = torch.randn(B,Fi,Fj,K)
b = torch.ones(B,1,Fj,K)

c= a*b 
print(c.shape)

torch.Size([2, 3, 4, 5])
