## 本文档实现了一个，根据用户、商品属性预测单个商品是否购买的模型，参考了DIN，但使用了比较简单模型输入

202108

## pre

In [None]:
!mkdir /content/drive/MyDrive/202108-bigdatacup2021

In [None]:
%cd /content/drive/MyDrive/202108-bigdatacup2021

/content/drive/MyDrive/202108-bigdatacup2021


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd

In [None]:
class VanillaBaseModel(nn.Module):
    def __init__(self, 
                 num_items,
                 dim_item_emb=64,
                 dim_item_discrete_feature_emb=16,
                 dim_user_discrete_feature_emb=16,
                ):
        super().__init__()
        self.NUM_ITEM_DISCRETE_FEATURE = 3 + 1 # item_vec3 + location1
        self.NUM_ITEM_CONT_FEATURE = 2 + 1 # item_vec2 + price1
        self.NUM_USER_DISCRETE_FEATURE = 10
        self.dim_item_emb = dim_item_emb

        self.item_emb = nn.Embedding(num_items + 1, dim_item_emb) # 0表示没有记录，因此 num_items + 1
        
        # item discrete feature
        self.item_discrete_feature_emb_list = nn.ModuleList()
        num_unique_value_list = [4, 10, 2, 3]
        for i in range(self.NUM_ITEM_DISCRETE_FEATURE):
            num_unique_value = num_unique_value_list[i]
            self.item_discrete_feature_emb_list.append(
                nn.Embedding(num_unique_value, dim_item_discrete_feature_emb)
            )
        
        # user discrete feature
        self.user_discrete_feature_emb_list = nn.ModuleList()
        num_unique_value_list = [3, 1430, 20, 10, 198, 52, 3, 13, 2, 2347]
        for i in range(self.NUM_USER_DISCRETE_FEATURE):
            num_unique_value = num_unique_value_list[i]
            self.user_discrete_feature_emb_list.append(
                nn.Embedding(num_unique_value, dim_user_discrete_feature_emb)
            )

        # backbone
        self.backbone = nn.Sequential(
            nn.Linear(dim_item_emb + # user_click_history
                      self.NUM_ITEM_DISCRETE_FEATURE * dim_item_discrete_feature_emb + 
                      self.NUM_ITEM_CONT_FEATURE + 
                      self.NUM_USER_DISCRETE_FEATURE * dim_user_discrete_feature_emb +
                      dim_item_emb, 200), 
            nn.Dropout(0.1),
            nn.PReLU(),
            nn.LayerNorm(200),
            nn.Linear(200, 80),
            nn.PReLU(),
            nn.Dropout(0.1),
            nn.LayerNorm(80),
            nn.Linear(80, 1)
        )


    def forward(self,
                user_click_history,
                num_user_click_history,
                user_discrete_feature,
                item_id,
                item_discrete_feature,
                item_cont_feature
                ):
        """
        user_click_history: [N, 300], 最多有300个点击历史记录, 每个里面是itemid, 0表示没有记录
        num_user_click_history: [N, 1], 用户点击历史数量
        user_discrete_feature: [N, 10] 用户离散属性（已重映射）
        item_id: [N, 1], 商品id
        item_discrete_feature: [N, 3 + 1] 商品离散属性（已重映射） item_vec3 + location1
        item_cont_feature: [N, 2 + 1] 商品连续属性（已正则化） item_vec2 + price1
        """

        batch_size = user_click_history.size()[0]
        
        ## User Bahaviors 这里只用了item_emb，没有用这些商品的属性，跟DIN论文不太一样，先这么写试试
        # user click history emb
        tmp = self.item_emb(user_click_history) # [N, 300] -> [N, 300, dim_item_emb]
        user_click_history_emb = torch.zeros((batch_size, self.dim_item_emb))
        for i in range(batch_size):
            #print(num_user_click_history[i])
            aa = tmp[i, :num_user_click_history[i], :] # [N, D]
            #print(aa.shape)
            a = torch.mean(aa, dim=0) # [N, d] -> [1, d]
            #print(a.shape)
            #print(user_click_history_emb.shape)
            user_click_history_emb[i] = a

        ## User Profile Features
        # user discrete feature, 10 features
        tmp = []
        for i in range(self.NUM_USER_DISCRETE_FEATURE):
            tmp.append(
                self.user_discrete_feature_emb_list[i](user_discrete_feature[:, i]) # [N, dim_user_discrete_feature_emb]
            )
        user_discrete_feature_emb = torch.cat(tmp, dim=1)

        ## Item
        # item discrete feature, 3 features
        tmp = []
        for i in range(self.NUM_ITEM_DISCRETE_FEATURE):
            # print(i)
            # print(item_discrete_feature[:, i])
            tmp.append(
                self.item_discrete_feature_emb_list[i](item_discrete_feature[:, i]) # [N, dim_user_discrete_feature_emb]
            )
        item_discrete_feature_emb = torch.cat(tmp, dim=1)
        # item emb
        item_emb = self.item_emb(item_id)
        item_emb = torch.squeeze(item_emb)

        ## all emb
        #print(user_click_history_emb.size())
        #print(user_discrete_feature_emb.size())
        #print(item_discrete_feature_emb.size())
        #print(item_cont_feature.size())
        #print(item_emb.size())

        all_emb = torch.cat([user_click_history_emb, 
                             user_discrete_feature_emb,
                             item_discrete_feature_emb,
                             item_cont_feature,
                             item_emb,
                            ], dim=1) # [N, D]
        
        out = self.backbone(all_emb) # [N, 1]
        return out
                

In [None]:
m = VanillaBaseModel(
    num_items=381,
    dim_item_emb=64,
    dim_item_discrete_feature_emb=16,
    dim_user_discrete_feature_emb=16,
)

In [None]:
# user_click_history: [N, 300], 最多有300个点击历史记录, 每个里面是itemid, 0表示没有记录
# num_user_click_history: [N, 1], 用户点击历史数量
# user_discrete_feature: [N, 10] 用户离散属性（已重映射）
# item_id: [N, 1], 商品id
# item_discrete_feature: [N, 3 + 1] 商品离散属性（已重映射）
# item_cont_feature: [N, 2 + 1] 商品连续属性（已正则化）

B = 3
m(
    user_click_history=torch.ones([B, 300], dtype=torch.int32),
    num_user_click_history=torch.ones([B, 1], dtype=torch.int32) * 10,
    user_discrete_feature=torch.ones([B, 10], dtype=torch.int32),
    item_id=torch.ones([B, 1], dtype=torch.int32),
    item_discrete_feature=torch.ones([B, 4], dtype=torch.int32),
    item_cont_feature=torch.randn([B, 3]),
)

tensor([[-1.0110],
        [-1.3368],
        [-1.4544]], grad_fn=<AddmmBackward>)

## dataloader

In [None]:
import pandas as pd
from sklearn.utils import shuffle
data_path='/content/drive/MyDrive/202108-bigdatacup2021/data/'
df_item_info = pd.read_csv(f'{data_path}/item_info.csv', sep=' ')

In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.utils import shuffle

In [None]:
## 获取 user portrait 的映射，因为
data_path='/content/drive/MyDrive/202108-bigdatacup2021/data/'
# portraitidx_to_idx_dict_list: list of 10 dict, int:int

portraitidx_to_idx_dict_list = []
for i in range(10):
    portraitidx_to_idx_dict_list.append(dict())
acculumated_idx = [0] * 10


df_train = pd.read_csv(f'{data_path}/trainset.csv', sep=' ')
for i in tqdm(range(df_train.shape[0])):
    user_portrait = [int(s) for s in df_train.at[i, 'user_protrait'].split(',')]
    for idx, u in enumerate(user_portrait):
        if portraitidx_to_idx_dict_list[idx].get(u, -1) == -1:
            portraitidx_to_idx_dict_list[idx][u] = acculumated_idx[idx]
            acculumated_idx[idx] += 1

df_test1 = pd.read_csv(f'{data_path}/track1_testset.csv', sep=' ')
for i in tqdm(range(df_test1.shape[0])):
    user_portrait = [int(s) for s in df_test1.at[i, 'user_protrait'].split(',')]
    for idx, u in enumerate(user_portrait):
        if portraitidx_to_idx_dict_list[idx].get(u, -1) == -1:
            portraitidx_to_idx_dict_list[idx][u] = acculumated_idx[idx]
            acculumated_idx[idx] += 1

df_test2 = pd.read_csv(f'{data_path}/track2_testset.csv', sep=' ')
for i in tqdm(range(df_test2.shape[0])):
    user_portrait = [int(s) for s in df_test2.at[i, 'user_protrait'].split(',')]
    for idx, u in enumerate(user_portrait):
        if portraitidx_to_idx_dict_list[idx].get(u, -1) == -1:
            portraitidx_to_idx_dict_list[idx][u] = acculumated_idx[idx]
            acculumated_idx[idx] += 1

acculumated_idx

100%|██████████| 260087/260087 [00:03<00:00, 71872.36it/s]
100%|██████████| 206254/206254 [00:02<00:00, 72847.23it/s]
100%|██████████| 206096/206096 [00:02<00:00, 72320.06it/s]


[3, 1430, 20, 10, 198, 52, 3, 13, 2, 2347]

In [None]:
def load_data(data_path='/content/drive/MyDrive/202108-bigdatacup2021/data/'):
    # item info
    df_item_info = pd.read_csv(f'{data_path}/item_info.csv', sep=' ')
    item_info_dict = {}
    for i in tqdm(range(df_item_info.shape[0])):
        item_id = df_item_info.at[i, 'item_id'] 

        item_discrete = df_item_info.at[i, 'item_vec'].split(',')[:3]
        item_cont = df_item_info.at[i, 'item_vec'].split(',')[-2:]
        price = df_item_info.at[i, 'price'] / 3000
        loc = df_item_info.at[i, 'location'] - 1 # 0~2

        item_cont.append(price) # 2 + 1
        item_discrete.append(loc) # 3 + 1

        item_cont = [float(it) for it in item_cont]
        item_discrete = [int(it) for it in item_discrete]
        item_discrete[0] = item_discrete[0] - 1 # 1~4 -> 0~3
        item_discrete[2] = item_discrete[2] - 1 # 1~2 -> 0~1

        item_info_dict[int(item_id)] = {
            'cont': np.array(item_cont, dtype=np.float64),
            'discrete': np.array(item_discrete, dtype=np.int64),
        }

    # trainset
    train_samples = []
    val_samples = []
    df_train = pd.read_csv(f'{data_path}/trainset.csv', sep=' ')

    # shuffle
    df_train = shuffle(df_train, random_state=2333).reset_index()
    total_num = int(df_train.shape[0])
    num_train = int(total_num * 0.95)
    num_val = total_num - num_train

    for i in tqdm(range(total_num)):
        if df_train.at[i, 'user_click_history'] == '0:0':
            user_click_list = [0]
        else:
            user_click_list = df_train.at[i, 'user_click_history'].split(',')
            user_click_list = [int(sample.split(':')[0]) for sample in user_click_list]
        num_user_click_history = len(user_click_list)
        tmp = np.zeros(400, dtype=np.int64)
        tmp[:len(user_click_list)] = user_click_list
        user_click_list = tmp
        
        exposed_items = [int(s) for s in df_train.at[i, 'exposed_items'].split(',')]
        labels = [int(s) for s in df_train.at[i, 'labels'].split(',')]

        user_portrait = [int(s) for s in df_train.at[i, 'user_protrait'].split(',')]
        # portraitidx_to_idx_dict_list: list of 10 dict, int:int
        for j in range(10):
            user_portrait[j] = portraitidx_to_idx_dict_list[j][user_portrait[j]]
        for k in range(9):
            one_sample = {
                'user_click_list': user_click_list,
                'num_user_click_history': num_user_click_history,
                'user_portrait': np.array(user_portrait, dtype=np.int64),
                'item_id': exposed_items[k],
                'label': labels[k]
            }
            if i < num_train:
                train_samples.append(one_sample)
            else:
                val_samples.append(one_sample)
    return item_info_dict, train_samples, val_samples


class BigDataCupDataset(torch.utils.data.Dataset):
    def __init__(self, 
                 item_info_dict,
                 database
                ):
        super().__init__()
        self.item_info_dict = item_info_dict
        self.database = database

    def __len__(self, ):
        return len(self.database)

    def __getitem__(self, idx):
        one_sample = self.database[idx]
        user_click_history = one_sample['user_click_list']
        num_user_click_history = one_sample['num_user_click_history']
        user_discrete_feature = one_sample['user_portrait']
        item_id = one_sample['item_id']
        item_discrete_feature = self.item_info_dict[item_id]['discrete']
        item_cont_feature = self.item_info_dict[item_id]['cont']
        label = one_sample['label']

        # print(num_user_click_history)

        user_click_history = torch.IntTensor(user_click_history)
        num_user_click_history = torch.IntTensor([num_user_click_history])
        user_discrete_feature = torch.IntTensor(user_discrete_feature)
        item_id = torch.IntTensor([item_id])
        item_discrete_feature = torch.IntTensor(item_discrete_feature)
        item_cont_feature = torch.FloatTensor(item_cont_feature)
        label = torch.IntTensor([label])

        # print(num_user_click_history)

        return user_click_history, num_user_click_history, user_discrete_feature, \
               item_id, item_discrete_feature, item_cont_feature, label

In [None]:
def load_test_data(data_path='/content/drive/MyDrive/202108-bigdatacup2021/data/', filename='track1_testset.csv'):
    # item info
    df_item_info = pd.read_csv(f'{data_path}/item_info.csv', sep=' ')
    item_info_dict = {}
    for i in tqdm(range(df_item_info.shape[0])):
        item_id = df_item_info.at[i, 'item_id'] 

        item_discrete = df_item_info.at[i, 'item_vec'].split(',')[:3]
        item_cont = df_item_info.at[i, 'item_vec'].split(',')[-2:]
        price = df_item_info.at[i, 'price'] / 3000
        loc = df_item_info.at[i, 'location'] - 1 # 0~2

        item_cont.append(price) # 2 + 1
        item_discrete.append(loc) # 3 + 1

        item_cont = [float(it) for it in item_cont]
        item_discrete = [int(it) for it in item_discrete]
        item_discrete[0] = item_discrete[0] - 1 # 1~4 -> 0~3
        item_discrete[2] = item_discrete[2] - 1 # 1~2 -> 0~1

        item_info_dict[int(item_id)] = {
            'cont': np.array(item_cont, dtype=np.float64),
            'discrete': np.array(item_discrete, dtype=np.int64),
        }

    # testset
    test_samples = []
    df_test = pd.read_csv(f'{data_path}/{filename}', sep=' ')

    # shuffle
    total_num = int(df_test.shape[0])

    for i in tqdm(range(total_num)):
        if df_test.at[i, 'user_click_history'] == '0:0':
            user_click_list = [0]
        else:
            user_click_list = df_test.at[i, 'user_click_history'].split(',')
            user_click_list = [int(sample.split(':')[0]) for sample in user_click_list]
        num_user_click_history = len(user_click_list)
        tmp = np.zeros(400, dtype=np.int64)
        tmp[:len(user_click_list)] = user_click_list
        user_click_list = tmp
        
        exposed_items = [int(s) for s in df_test.at[i, 'exposed_items'].split(',')]
        labels = [int(s) for s in df_test.at[i, 'labels'].split(',')]

        user_portrait = [int(s) for s in df_test.at[i, 'user_protrait'].split(',')]
        # portraitidx_to_idx_dict_list: list of 10 dict, int:int
        for j in range(10):
            user_portrait[j] = portraitidx_to_idx_dict_list[j][user_portrait[j]]
        for k in range(9):
            one_sample = {
                'user_click_list': user_click_list,
                'num_user_click_history': num_user_click_history,
                'user_portrait': np.array(user_portrait, dtype=np.int64),
                'item_id': exposed_items[k],
            }
            test_samples.append(one_sample)
    return item_info_dict, test_samples


class BigDataCupTestDataset(torch.utils.data.Dataset):
    def __init__(self, 
                 item_info_dict,
                 database
                ):
        super().__init__()
        self.item_info_dict = item_info_dict
        self.database = database

    def __len__(self, ):
        return len(self.database)

    def __getitem__(self, idx):
        one_sample = self.database[idx]
        user_click_history = one_sample['user_click_list']
        num_user_click_history = one_sample['num_user_click_history']
        user_discrete_feature = one_sample['user_portrait']
        item_id = one_sample['item_id']
        item_discrete_feature = self.item_info_dict[item_id]['discrete']
        item_cont_feature = self.item_info_dict[item_id]['cont']

        user_click_history = torch.IntTensor(user_click_history)
        num_user_click_history = torch.IntTensor([num_user_click_history])
        user_discrete_feature = torch.IntTensor(user_discrete_feature)
        item_id = torch.IntTensor([item_id])
        item_discrete_feature = torch.IntTensor(item_discrete_feature)
        item_cont_feature = torch.FloatTensor(item_cont_feature)

        return user_click_history, num_user_click_history, user_discrete_feature, \
               item_id, item_discrete_feature, item_cont_feature

## training

In [None]:
item_info_dict, train_samples, val_samples = load_data()

train_ds = BigDataCupDataset(item_info_dict, train_samples)
train_dl = torch.utils.data.DataLoader(dataset=train_ds, batch_size=32, shuffle=True)

val_ds = BigDataCupDataset(item_info_dict, val_samples)
val_dl = torch.utils.data.DataLoader(dataset=val_ds, batch_size=9, shuffle=False)

100%|██████████| 381/381 [00:00<00:00, 18495.93it/s]
100%|██████████| 260087/260087 [00:31<00:00, 8232.60it/s]


In [None]:
def binary_acc(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))
    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum / y_test.shape[0]
    return acc


## below: only applicable for batch_size==9 in validation
def real_acc_calc(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))
    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum / y_test.shape[0]
    if acc != 1.0:
        return 0
    else:
        return 1

def real_acc_rule_calc(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))
    
    cum_sum = 0
    for j in range(9):
        if y_pred_tag[j][0] == 1:
            cum_sum += 1
        if j == 2 and cum_sum != 3:
            y_pred_tag[3:] = 0
            break
        if j == 5 and cum_sum != 6:
            y_pred_tag[6:] = 0
            break
    
    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum / y_test.shape[0]
    if acc != 1.0:
        return 0
    else:
        return 1

def real_acc_rule2_calc(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))
    
    cum_sum = 0
    for j in range(9):
        k = 8 - j
        if k >= 6 and y_pred_tag[k][0] == 1:
            y_pred_tag[:6] = 1
        if k >= 3 and y_pred_tag[k][0] == 1:
            y_pred_tag[:3] = 1
    
    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum / y_test.shape[0]
    if acc != 1.0:
        return 0
    else:
        return 1

In [None]:
model = VanillaBaseModel(
    num_items=381,
    dim_item_emb=16,
    dim_item_discrete_feature_emb=16,
    dim_user_discrete_feature_emb=16,
)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)


NUM_EPOCH = 2
for epoch_idx in range(NUM_EPOCH):  # loop over the dataset multiple times

    running_loss = 0.0
    train_cnt = 0
    train_acc_sum = 0
    
    for i, data in enumerate(train_dl, 0):
        model.train()

        # get the inputs; data is a list of [inputs, labels]
        user_click_history, num_user_click_history, user_discrete_feature, \
               item_id, item_discrete_feature, item_cont_feature, label = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(user_click_history, num_user_click_history, user_discrete_feature, \
               item_id, item_discrete_feature, item_cont_feature)

        loss = criterion(outputs, label.float())
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()

        acc = binary_acc(outputs, label)
        train_acc_sum += acc
        train_cnt += 1

        if i % 1000 == 1:    # print every 2000 mini-batches
            print('----- TRAIN -----')
            print('[%d, %5d] loss: %.3f' % (epoch_idx + 1, i + 1, running_loss))
            print('- acc:', train_acc_sum / train_cnt, flush=True)

            running_loss = 0.0
            train_cnt = 0
            train_acc_sum = 0
            # print(outputs, label)

            ## val
            model.eval()
            cnt = 0
            acc_sum = 0
            real_acc_sum = 0
            real_rule_acc_sum = 0
            real_rule2_acc_sum = 0
            
            for _, val_data in tqdm(enumerate(val_dl, 0)):
                user_click_history, num_user_click_history, user_discrete_feature, \
                    item_id, item_discrete_feature, item_cont_feature, label = val_data
                outputs = model(user_click_history, num_user_click_history, user_discrete_feature, \
                    item_id, item_discrete_feature, item_cont_feature)
                acc = binary_acc(outputs, label)
                real_acc = real_acc_calc(outputs, label)
                real_rule_acc = real_acc_rule_calc(outputs, label)
                real_rule2_acc = real_acc_rule2_calc(outputs, label)
                
                acc_sum += acc
                real_acc_sum += real_acc
                real_rule_acc_sum += real_rule_acc
                real_rule2_acc_sum += real_rule2_acc

                cnt += 1
            print('----- VAL -----')
            print('- acc:', acc_sum / cnt)
            print('- real acc:', real_acc_sum / cnt)
            print('- real rule acc:', real_rule_acc_sum / cnt)
            print('- real rule2 acc:', real_rule2_acc_sum / cnt)

print('Finished Training')

----- TRAIN -----
[1,     2] loss: 1.438
- acc: tensor(0.5156)


13005it [00:36, 359.67it/s]


----- VAL -----
- acc: tensor(0.5883)
- real acc: 0.08227604767397155
- real rule acc: 0.0946559015763168
- real rule2 acc: 0.12856593617839293
----- TRAIN -----
[1,  1002] loss: 528.724
- acc: tensor(0.7445)


13005it [00:35, 362.85it/s]


----- VAL -----
- acc: tensor(0.7494)
- real acc: 0.20499807766243752
- real rule acc: 0.21491733948481354
- real rule2 acc: 0.24159938485198001
----- TRAIN -----
[1,  2002] loss: 519.285
- acc: tensor(0.7462)


13005it [00:36, 359.47it/s]


----- VAL -----
- acc: tensor(0.7465)
- real acc: 0.24390618992695118
- real rule acc: 0.2537485582468281
- real rule2 acc: 0.2701268742791234
----- TRAIN -----
[1,  3002] loss: 509.129
- acc: tensor(0.7527)


13005it [00:35, 365.08it/s]


----- VAL -----
- acc: tensor(0.7553)
- real acc: 0.18346789696270666
- real rule acc: 0.19707804690503652
- real rule2 acc: 0.23391003460207613
----- TRAIN -----
[1,  4002] loss: 501.312
- acc: tensor(0.7599)


13005it [00:35, 361.45it/s]


----- VAL -----
- acc: tensor(0.7591)
- real acc: 0.23260284505959247
- real rule acc: 0.24013840830449826
- real rule2 acc: 0.25520953479430986
----- TRAIN -----
[1,  5002] loss: 500.424
- acc: tensor(0.7604)


13005it [00:35, 362.03it/s]


----- VAL -----
- acc: tensor(0.7616)
- real acc: 0.20153787004998078
- real rule acc: 0.20838139177239523
- real rule2 acc: 0.2271434063821607
----- TRAIN -----
[1,  6002] loss: 494.436
- acc: tensor(0.7657)


13005it [00:35, 361.52it/s]


----- VAL -----
- acc: tensor(0.7637)
- real acc: 0.21237985390234526
- real rule acc: 0.22122260668973473
- real rule2 acc: 0.24206074586697424
----- TRAIN -----
[1,  7002] loss: 491.484
- acc: tensor(0.7667)


13005it [00:36, 359.03it/s]


----- VAL -----
- acc: tensor(0.7644)
- real acc: 0.2219915417147251
- real rule acc: 0.23137254901960785
- real rule2 acc: 0.2485198000768935
----- TRAIN -----
[1,  8002] loss: 496.413
- acc: tensor(0.7616)


13005it [00:36, 361.05it/s]


----- VAL -----
- acc: tensor(0.7663)
- real acc: 0.21768550557477892
- real rule acc: 0.22637447135717031
- real rule2 acc: 0.24390618992695118
----- TRAIN -----
[1,  9002] loss: 488.702
- acc: tensor(0.7688)


13005it [00:35, 363.89it/s]


----- VAL -----
- acc: tensor(0.7680)
- real acc: 0.2302960399846213
- real rule acc: 0.23844675124951942
- real rule2 acc: 0.2506728181468666
----- TRAIN -----
[1, 10002] loss: 487.476
- acc: tensor(0.7687)


13005it [00:35, 365.94it/s]


----- VAL -----
- acc: tensor(0.7680)
- real acc: 0.2542868127643214
- real rule acc: 0.26197616301422527
- real rule2 acc: 0.2696655132641292
----- TRAIN -----
[1, 11002] loss: 488.492
- acc: tensor(0.7706)


13005it [00:36, 352.73it/s]


----- VAL -----
- acc: tensor(0.7685)
- real acc: 0.24021530180699732
- real rule acc: 0.2471357170319108
- real rule2 acc: 0.25551710880430606
----- TRAIN -----
[1, 12002] loss: 479.145
- acc: tensor(0.7745)


13005it [00:35, 369.66it/s]


----- VAL -----
- acc: tensor(0.7725)
- real acc: 0.2477508650519031
- real rule acc: 0.2569011918492887
- real rule2 acc: 0.26782006920415224
----- TRAIN -----
[1, 13002] loss: 481.649
- acc: tensor(0.7718)


13005it [00:34, 372.20it/s]


----- VAL -----
- acc: tensor(0.7717)
- real acc: 0.23606305267204922
- real rule acc: 0.24367550941945407
- real rule2 acc: 0.2584390618992695
----- TRAIN -----
[1, 14002] loss: 480.463
- acc: tensor(0.7745)


13005it [00:35, 368.67it/s]


----- VAL -----
- acc: tensor(0.7720)
- real acc: 0.24282968089196463
- real rule acc: 0.2519031141868512
- real rule2 acc: 0.2678969627066513
----- TRAIN -----
[1, 15002] loss: 477.882
- acc: tensor(0.7775)


13005it [00:35, 369.52it/s]


----- VAL -----
- acc: tensor(0.7717)
- real acc: 0.25713187235678586
- real rule acc: 0.2685121107266436
- real rule2 acc: 0.27935409457900806
----- TRAIN -----
[1, 16002] loss: 478.939
- acc: tensor(0.7791)


13005it [00:36, 356.57it/s]


----- VAL -----
- acc: tensor(0.7645)
- real acc: 0.2780469050365244
- real rule acc: 0.2837370242214533
- real rule2 acc: 0.2875048058439062
----- TRAIN -----
[1, 17002] loss: 474.871
- acc: tensor(0.7759)


13005it [00:35, 363.87it/s]


----- VAL -----
- acc: tensor(0.7736)
- real acc: 0.25459438677431756
- real rule acc: 0.2621299500192234
- real rule2 acc: 0.27197231833910035
----- TRAIN -----
[1, 18002] loss: 477.290
- acc: tensor(0.7774)


13005it [00:35, 363.81it/s]


----- VAL -----
- acc: tensor(0.7751)
- real acc: 0.24113802383698577
- real rule acc: 0.2512879661668589
- real rule2 acc: 0.2669742406766628
----- TRAIN -----
[1, 19002] loss: 478.124
- acc: tensor(0.7781)


13005it [00:36, 360.40it/s]


----- VAL -----
- acc: tensor(0.7752)
- real acc: 0.253440984236832
- real rule acc: 0.2642060745866974
- real rule2 acc: 0.2776624375240292
----- TRAIN -----
[1, 20002] loss: 481.812
- acc: tensor(0.7711)


13005it [00:35, 367.28it/s]


----- VAL -----
- acc: tensor(0.7755)
- real acc: 0.25736255286428295
- real rule acc: 0.26628219915417145
- real rule2 acc: 0.2801999231064975
----- TRAIN -----
[1, 21002] loss: 479.040
- acc: tensor(0.7750)


13005it [00:35, 366.74it/s]


----- VAL -----
- acc: tensor(0.7749)
- real acc: 0.2542099192618224
- real rule acc: 0.26605151864667437
- real rule2 acc: 0.283275663206459
----- TRAIN -----
[1, 22002] loss: 473.050
- acc: tensor(0.7799)


13005it [00:35, 362.46it/s]


----- VAL -----
- acc: tensor(0.7748)
- real acc: 0.26343713956170706
- real rule acc: 0.2736639753940792
- real rule2 acc: 0.281199538638985
----- TRAIN -----
[1, 23002] loss: 471.839
- acc: tensor(0.7797)


13005it [00:35, 361.85it/s]


----- VAL -----
- acc: tensor(0.7734)
- real acc: 0.26505190311418686
- real rule acc: 0.27143406382160706
- real rule2 acc: 0.27735486351403305
----- TRAIN -----
[1, 24002] loss: 471.644
- acc: tensor(0.7807)


13005it [00:36, 357.02it/s]


----- VAL -----
- acc: tensor(0.7747)
- real acc: 0.26620530565167244
- real rule acc: 0.2748942714340638
- real rule2 acc: 0.28127643214148407
----- TRAIN -----
[1, 25002] loss: 475.327
- acc: tensor(0.7764)


13005it [00:36, 356.70it/s]


----- VAL -----
- acc: tensor(0.7768)
- real acc: 0.2348327566320646
- real rule acc: 0.24421376393694733
- real rule2 acc: 0.2639753940792003
----- TRAIN -----
[1, 26002] loss: 471.717
- acc: tensor(0.7779)


13005it [00:35, 362.52it/s]


----- VAL -----
- acc: tensor(0.7782)
- real acc: 0.24805843906189928
- real rule acc: 0.2586697424067666
- real rule2 acc: 0.27474048442906573
----- TRAIN -----
[1, 27002] loss: 471.710
- acc: tensor(0.7785)


13005it [00:36, 358.91it/s]


----- VAL -----
- acc: tensor(0.7761)
- real acc: 0.2585159554017685
- real rule acc: 0.26782006920415224
- real rule2 acc: 0.2770472895040369
----- TRAIN -----
[1, 28002] loss: 473.051
- acc: tensor(0.7766)


13005it [00:35, 364.15it/s]


----- VAL -----
- acc: tensor(0.7778)
- real acc: 0.24290657439446367
- real rule acc: 0.25151864667435603
- real rule2 acc: 0.2689734717416378
----- TRAIN -----
[1, 29002] loss: 466.584
- acc: tensor(0.7829)


13005it [00:35, 361.45it/s]


----- VAL -----
- acc: tensor(0.7793)
- real acc: 0.24467512495194155
- real rule acc: 0.25459438677431756
- real rule2 acc: 0.26820453671664746
----- TRAIN -----
[1, 30002] loss: 469.493
- acc: tensor(0.7790)


13005it [00:35, 361.97it/s]


----- VAL -----
- acc: tensor(0.7773)
- real acc: 0.24813533256439832
- real rule acc: 0.25728565936178394
- real rule2 acc: 0.267358708189158
----- TRAIN -----
[1, 31002] loss: 466.536
- acc: tensor(0.7834)


13005it [00:36, 360.78it/s]


----- VAL -----
- acc: tensor(0.7769)
- real acc: 0.2754325259515571
- real rule acc: 0.28604382929642447
- real rule2 acc: 0.29473279507881583
----- TRAIN -----
[1, 32002] loss: 464.532
- acc: tensor(0.7819)


13005it [00:35, 362.06it/s]


----- VAL -----
- acc: tensor(0.7788)
- real acc: 0.255440215301807
- real rule acc: 0.26582083813917723
- real rule2 acc: 0.28035371011149557
----- TRAIN -----
[1, 33002] loss: 470.577
- acc: tensor(0.7805)


13005it [00:36, 359.65it/s]


----- VAL -----
- acc: tensor(0.7799)
- real acc: 0.2585159554017685
- real rule acc: 0.2686658977316417
- real rule2 acc: 0.2796616685890042
----- TRAIN -----
[1, 34002] loss: 461.123
- acc: tensor(0.7848)


13005it [00:36, 355.58it/s]


----- VAL -----
- acc: tensor(0.7781)
- real acc: 0.2623606305267205
- real rule acc: 0.2728950403690888
- real rule2 acc: 0.2845059592464437
----- TRAIN -----
[1, 35002] loss: 467.844
- acc: tensor(0.7804)


13005it [00:35, 363.25it/s]


----- VAL -----
- acc: tensor(0.7749)
- real acc: 0.26566705113417916
- real rule acc: 0.2778162245290273
- real rule2 acc: 0.2918877354863514
----- TRAIN -----
[1, 36002] loss: 472.327
- acc: tensor(0.7802)


13005it [00:36, 359.69it/s]


----- VAL -----
- acc: tensor(0.7788)
- real acc: 0.2585928489042676
- real rule acc: 0.26828143021914647
- real rule2 acc: 0.28096885813148786
----- TRAIN -----
[1, 37002] loss: 469.660
- acc: tensor(0.7814)


13005it [00:36, 360.57it/s]


----- VAL -----
- acc: tensor(0.7763)
- real acc: 0.28266051518646673
- real rule acc: 0.2943483275663206
- real rule2 acc: 0.30680507497116494
----- TRAIN -----
[1, 38002] loss: 466.042
- acc: tensor(0.7843)


13005it [00:36, 355.21it/s]


----- VAL -----
- acc: tensor(0.7801)
- real acc: 0.2685121107266436
- real rule acc: 0.27773933102652826
- real rule2 acc: 0.28673587081891583
----- TRAIN -----
[1, 39002] loss: 473.266
- acc: tensor(0.7793)


13005it [00:36, 354.55it/s]


----- VAL -----
- acc: tensor(0.7783)
- real acc: 0.25982314494425224
- real rule acc: 0.26981930026912726
- real rule2 acc: 0.28089196462898885
----- TRAIN -----
[1, 40002] loss: 469.439
- acc: tensor(0.7804)


13005it [00:36, 358.31it/s]


----- VAL -----
- acc: tensor(0.7801)
- real acc: 0.2609765474817378
- real rule acc: 0.27035755478662055
- real rule2 acc: 0.28089196462898885
----- TRAIN -----
[1, 41002] loss: 464.457
- acc: tensor(0.7841)


13005it [00:36, 357.62it/s]


----- VAL -----
- acc: tensor(0.7757)
- real acc: 0.2748942714340638
- real rule acc: 0.287043444828912
- real rule2 acc: 0.2965013456362937
----- TRAIN -----
[1, 42002] loss: 465.151
- acc: tensor(0.7816)


13005it [00:36, 360.94it/s]


----- VAL -----
- acc: tensor(0.7811)
- real acc: 0.27412533640907344
- real rule acc: 0.28404459823144945
- real rule2 acc: 0.29565551710880433
----- TRAIN -----
[1, 43002] loss: 464.247
- acc: tensor(0.7845)


13005it [00:36, 359.57it/s]


----- VAL -----
- acc: tensor(0.7807)
- real acc: 0.26582083813917723
- real rule acc: 0.27297193387158786
- real rule2 acc: 0.28404459823144945
----- TRAIN -----
[1, 44002] loss: 466.872
- acc: tensor(0.7819)


13005it [00:36, 357.02it/s]


----- VAL -----
- acc: tensor(0.7804)
- real acc: 0.26943483275663205
- real rule acc: 0.2774317570165321
- real rule2 acc: 0.287043444828912
----- TRAIN -----
[1, 45002] loss: 460.778
- acc: tensor(0.7854)


13005it [00:36, 359.13it/s]


----- VAL -----
- acc: tensor(0.7811)
- real acc: 0.267358708189158
- real rule acc: 0.27550941945405616
- real rule2 acc: 0.2855055747789312
----- TRAIN -----
[1, 46002] loss: 467.236
- acc: tensor(0.7833)


13005it [00:36, 359.05it/s]


----- VAL -----
- acc: tensor(0.7793)
- real acc: 0.25236447520184546
- real rule acc: 0.26605151864667437
- real rule2 acc: 0.28427527873894654
----- TRAIN -----
[1, 47002] loss: 462.153
- acc: tensor(0.7857)


13005it [00:36, 358.53it/s]


----- VAL -----
- acc: tensor(0.7800)
- real acc: 0.2578239138792772
- real rule acc: 0.2668973471741638
- real rule2 acc: 0.2806612841214917
----- TRAIN -----
[1, 48002] loss: 463.692
- acc: tensor(0.7830)


13005it [00:36, 358.53it/s]


----- VAL -----
- acc: tensor(0.7789)
- real acc: 0.26582083813917723
- real rule acc: 0.2744329104190696
- real rule2 acc: 0.2837370242214533
----- TRAIN -----
[1, 49002] loss: 460.075
- acc: tensor(0.7844)


13005it [00:36, 356.76it/s]


----- VAL -----
- acc: tensor(0.7806)
- real acc: 0.2770472895040369
- real rule acc: 0.28565936178392926
- real rule2 acc: 0.29627066512879663
----- TRAIN -----
[1, 50002] loss: 458.965
- acc: tensor(0.7877)


13005it [00:36, 356.06it/s]


----- VAL -----
- acc: tensor(0.7796)
- real acc: 0.2528258362168397
- real rule acc: 0.2621299500192234
- real rule2 acc: 0.27935409457900806
----- TRAIN -----
[1, 51002] loss: 460.367
- acc: tensor(0.7840)


13005it [00:36, 359.94it/s]


----- VAL -----
- acc: tensor(0.7793)
- real acc: 0.2445982314494425
- real rule acc: 0.2522106881968474
- real rule2 acc: 0.26920415224913496
----- TRAIN -----
[1, 52002] loss: 462.672
- acc: tensor(0.7829)


13005it [00:36, 356.08it/s]


----- VAL -----
- acc: tensor(0.7807)
- real acc: 0.2691272587466359
- real rule acc: 0.2805843906189927
- real rule2 acc: 0.2944252210688197
----- TRAIN -----
[1, 53002] loss: 460.366
- acc: tensor(0.7853)


13005it [00:36, 354.14it/s]


----- VAL -----
- acc: tensor(0.7800)
- real acc: 0.25720876585928487
- real rule acc: 0.2669742406766628
- real rule2 acc: 0.28204536716647444
----- TRAIN -----
[1, 54002] loss: 460.682
- acc: tensor(0.7847)


13005it [00:36, 359.80it/s]


----- VAL -----
- acc: tensor(0.7810)
- real acc: 0.2685890042291426
- real rule acc: 0.2778162245290273
- real rule2 acc: 0.28981161091887736
----- TRAIN -----
[1, 55002] loss: 463.859
- acc: tensor(0.7844)


13005it [00:36, 360.93it/s]


----- VAL -----
- acc: tensor(0.7813)
- real acc: 0.25928489042675895
- real rule acc: 0.26981930026912726
- real rule2 acc: 0.2879661668589004
----- TRAIN -----
[1, 56002] loss: 461.153
- acc: tensor(0.7839)


13005it [00:36, 358.38it/s]


----- VAL -----
- acc: tensor(0.7801)
- real acc: 0.27304882737408687
- real rule acc: 0.28342945021145716
- real rule2 acc: 0.2975009611687812
----- TRAIN -----
[1, 57002] loss: 465.034
- acc: tensor(0.7813)


13005it [00:36, 356.01it/s]


----- VAL -----
- acc: tensor(0.7814)
- real acc: 0.2616685890042291
- real rule acc: 0.27120338331411
- real rule2 acc: 0.2837370242214533
----- TRAIN -----
[1, 58002] loss: 463.866
- acc: tensor(0.7830)


13005it [00:37, 350.10it/s]


----- VAL -----
- acc: tensor(0.7806)
- real acc: 0.2501345636293733
- real rule acc: 0.2589773164167628
- real rule2 acc: 0.2774317570165321
----- TRAIN -----
[1, 59002] loss: 458.968
- acc: tensor(0.7851)


13005it [00:36, 355.06it/s]


----- VAL -----
- acc: tensor(0.7811)
- real acc: 0.2507497116493656
- real rule acc: 0.2605920799692426
- real rule2 acc: 0.275278738946559
----- TRAIN -----
[1, 60002] loss: 468.010
- acc: tensor(0.7815)


13005it [00:37, 350.28it/s]


----- VAL -----
- acc: tensor(0.7810)
- real acc: 0.2695886197616301
- real rule acc: 0.2818146866589773
- real rule2 acc: 0.297039600153787
----- TRAIN -----
[1, 61002] loss: 459.134
- acc: tensor(0.7853)


13005it [00:37, 349.75it/s]


----- VAL -----
- acc: tensor(0.7815)
- real acc: 0.255440215301807
- real rule acc: 0.2649750096116878
- real rule2 acc: 0.27920030757401
----- TRAIN -----
[1, 62002] loss: 468.944
- acc: tensor(0.7806)


13005it [00:37, 349.87it/s]


----- VAL -----
- acc: tensor(0.7820)
- real acc: 0.2708958093041138
- real rule acc: 0.2817377931564783
- real rule2 acc: 0.2933487120338331
----- TRAIN -----
[1, 63002] loss: 457.729
- acc: tensor(0.7870)


13005it [00:36, 355.98it/s]


----- VAL -----
- acc: tensor(0.7813)
- real acc: 0.26543637062668207
- real rule acc: 0.2768935024990388
- real rule2 acc: 0.29311803152633603
----- TRAIN -----
[1, 64002] loss: 462.873
- acc: tensor(0.7834)


13005it [00:37, 349.67it/s]


----- VAL -----
- acc: tensor(0.7821)
- real acc: 0.2520569011918493
- real rule acc: 0.26128412149173397
- real rule2 acc: 0.2758169934640523
----- TRAIN -----
[1, 65002] loss: 463.858
- acc: tensor(0.7827)


13005it [00:37, 347.94it/s]


----- VAL -----
- acc: tensor(0.7809)
- real acc: 0.2737408688965782
- real rule acc: 0.28312187620146095
- real rule2 acc: 0.2938100730488274
----- TRAIN -----
[1, 66002] loss: 461.970
- acc: tensor(0.7866)


13005it [00:37, 348.76it/s]


----- VAL -----
- acc: tensor(0.7819)
- real acc: 0.25667051134179164
- real rule acc: 0.2658977316416763
- real rule2 acc: 0.27927720107650905
----- TRAIN -----
[1, 67002] loss: 453.622
- acc: tensor(0.7893)


13005it [00:37, 349.94it/s]


----- VAL -----
- acc: tensor(0.7801)
- real acc: 0.2822760476739716
- real rule acc: 0.2938100730488274
- real rule2 acc: 0.30503652441368706
----- TRAIN -----
[1, 68002] loss: 462.444
- acc: tensor(0.7846)


13005it [00:37, 348.98it/s]


----- VAL -----
- acc: tensor(0.7817)
- real acc: 0.28212226066897345
- real rule acc: 0.29242599000384467
- real rule2 acc: 0.3031910803537101
----- TRAIN -----
[1, 69002] loss: 462.532
- acc: tensor(0.7849)


13005it [00:37, 350.50it/s]


----- VAL -----
- acc: tensor(0.7813)
- real acc: 0.27097270280661284
- real rule acc: 0.27873894655901577
- real rule2 acc: 0.2895809304113802
----- TRAIN -----
[2,     2] loss: 0.844
- acc: tensor(0.7812)


13005it [00:37, 349.11it/s]


----- VAL -----
- acc: tensor(0.7827)
- real acc: 0.26305267204921184
- real rule acc: 0.27304882737408687
- real rule2 acc: 0.28996539792387543
----- TRAIN -----
[2,  1002] loss: 458.823
- acc: tensor(0.7845)


13005it [00:36, 354.17it/s]


----- VAL -----
- acc: tensor(0.7784)
- real acc: 0.2538254517493272
- real rule acc: 0.2665128796616686
- real rule2 acc: 0.2905036524413687
----- TRAIN -----
[2,  2002] loss: 461.267
- acc: tensor(0.7830)


13005it [00:36, 351.85it/s]


----- VAL -----
- acc: tensor(0.7826)
- real acc: 0.26682045367166474
- real rule acc: 0.27566320645905423
- real rule2 acc: 0.28673587081891583
----- TRAIN -----
[2,  3002] loss: 459.763
- acc: tensor(0.7833)


13005it [00:36, 351.97it/s]


----- VAL -----
- acc: tensor(0.7821)
- real acc: 0.26774317570165324
- real rule acc: 0.27835447904652055
- real rule2 acc: 0.2928873510188389
----- TRAIN -----
[2,  4002] loss: 456.206
- acc: tensor(0.7873)


13005it [00:36, 352.42it/s]


----- VAL -----
- acc: tensor(0.7831)
- real acc: 0.2612072279892349
- real rule acc: 0.2711264898116109
- real rule2 acc: 0.2895040369088812
----- TRAIN -----
[2,  5002] loss: 457.637
- acc: tensor(0.7881)


13005it [00:37, 351.08it/s]


----- VAL -----
- acc: tensor(0.7828)
- real acc: 0.2668973471741638
- real rule acc: 0.2768935024990388
- real rule2 acc: 0.29219530949634753
----- TRAIN -----
[2,  6002] loss: 462.897
- acc: tensor(0.7829)


6960it [00:19, 358.05it/s]

In [None]:
torch.save(model, 'vanilla_model_epoch1.2.pth')

In [None]:
torch.save(model, 'vanilla_layernorm_model_epoch1.2.pth')

## test script

In [None]:
item_info_dict, test_samples = load_test_data(data_path='/content/drive/MyDrive/202108-bigdatacup2021/data/', filename='track1_testset.csv')

test_ds = BigDataCupTestDataset(item_info_dict, test_samples)
test_dl = torch.utils.data.DataLoader(dataset=test_ds, batch_size=9, shuffle=False)

100%|██████████| 381/381 [00:00<00:00, 28568.39it/s]
100%|██████████| 206254/206254 [00:18<00:00, 11116.89it/s]


In [None]:
model = model.eval()

fp = open('vanilla_model_epoch1.2_track1.csv', 'w')
print('id,category', file=fp)

for i, data in tqdm(enumerate(test_dl, 0)):
    user_click_history, num_user_click_history, user_discrete_feature, \
            item_id, item_discrete_feature, item_cont_feature = data

    # forward + backward + optimize
    outputs = model(user_click_history, num_user_click_history, user_discrete_feature, \
            item_id, item_discrete_feature, item_cont_feature)
    
    y_pred_tag = torch.round(torch.sigmoid(outputs))

    ## rule1
    # cum_sum = 0
    # for j in range(9):
    #     if y_pred_tag[j][0] == 1:
    #         cum_sum += 1
    #     if j == 2 and cum_sum != 3:
    #         y_pred_tag[3:] = 0
    #         break
    #     if j == 5 and cum_sum != 6:
    #         y_pred_tag[6:] = 0
    #         break

    ## rule2
    cum_sum = 0
    for j in range(9):
        k = 8 - j
        if k >= 6 and y_pred_tag[k][0] == 1:
            y_pred_tag[:6] = 1
        if k >= 3 and y_pred_tag[k][0] == 1:
            y_pred_tag[:3] = 1
    
    y_pred_tag = list(y_pred_tag.detach().numpy()[:, 0].astype(np.int32))
    y_pred_tag = [str(a) for a in y_pred_tag]
    p = ' '.join(y_pred_tag)
    print(f'{i+1},{p}', file=fp)
    # break

fp.close()

206254it [05:33, 618.80it/s]
