In [1]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
import random
import torch.optim as optim
import pickle
import torch.utils.data
from torch.backends import cudnn
from scipy.sparse import csr_matrix
import math
import bottleneck as bn
import matplotlib.pyplot as plt
import time

### dataset

In [2]:
gpu = 0
inter = 5
train = torch.load('data/cul/train_' + str(inter) + '.pt')
val = torch.load('data/cul/val_' + str(inter) + '.pt')
test = torch.load('data/cul/test_' + str(inter) + '.pt')

train_matrix = torch.load('data/cul/train_matrix_' + str(inter) + '.pt')
train_nei = np.load('data/cul/train_nei_' + str(inter) + '.npy').item()

train_matrix_input = train_matrix.clone().type(torch.FloatTensor)
for idx, (u,i) in enumerate(val):
    train_matrix_input[u][val[idx][1]] = 0
    train_matrix_input[u][test[idx][1]] = 0

num_users = train_matrix.size()[0]
num_items = train_matrix.size()[1]

print(num_users)
print(num_items)
print(train.size()[0]+val.size()[0]*2)

# for neg_sample
matrix = train_matrix.numpy()
neg_max = num_items - min(np.sum(matrix, axis = 1))
neg_count = neg_max - np.sum(matrix, axis = 1)

i, j = np.where(matrix == 0)
user = 0
count = 0
negs = []
for index, idx in enumerate(i):
    if user < idx:
        user = idx
        neg = j[count:index].tolist()
        neg += [-1]*(int(neg_max)-len(neg))
        negs.append(neg)
        count = index 
neg = j[count:].tolist()
neg += [-1]*(int(neg_max)-len(neg))        
negs.append(neg)
negs_np = np.array(negs)

5219
25187
130799


In [3]:
# train set
class traindset(torch.utils.data.Dataset):
    def __init__(self, data, idxs):
        self.data = data
        self.idxs = idxs
        
    def __getitem__(self, idx):
        return self.data[idx], self.idxs[idx]
        
    def __len__(self):
        return np.shape(self.data)[0]

train_dset = traindset(train_matrix_input, torch.arange(num_users).type(torch.LongTensor))
val_dset = traindset(val, torch.arange(num_users))
test_dset = traindset(test, torch.arange(num_users))

def HR(k, eval_sort):
    eval_sort = eval_sort.cpu().numpy()
    _, idy = np.where(eval_sort == 0)
    
    return len(np.where(idy < k)[0]) / len(eval_sort)

def NDCG(k, eval_sort):
    eval_sort = eval_sort.cpu().numpy()
    _, idy = np.where(eval_sort == 0) # N
    rank = idy[np.where(idy < k)[0]]
    
    return np.sum(1 / np.log2(rank+2)) / len(eval_sort)

### base model - CDAE

In [4]:
class CDAE(nn.Module):
    def __init__(self, hid_dim, matrix, temp):
        super(CDAE, self).__init__()
        self.num_users = matrix.size()[0]
        self.num_items = matrix.size()[1]
        self.hid_dim = hid_dim
        self.matrix = matrix.cuda(gpu)
        self.temp = temp

        self.u_emb = nn.Embedding(self.num_users, hid_dim)
        
        self.E = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(num_items, hid_dim)
        )

        self.D = nn.Sequential(
            nn.Linear(hid_dim, num_items),
        ) 
        
        self.sig = nn.Sigmoid()
        
    def forward(self, u, idx):
        wyu = self.E(u)
        vu = self.u_emb(idx)
        
        zu = wyu + vu
        zu = self.sig(zu)
        
        hu = self.D(zu)
        u_recon = self.sig(hu)
        u_recon_tmp = self.sig(hu / self.temp)

        # negative sampling
        weight_CF = u.clone().view(-1, num_items)
        for i in range(len(idx)):
            neg_idx = np.random.randint(0, neg_count[idx[i]], size=(1, int(torch.sum(u[i])*num_neg)))
            neg_items = np.array(negs_np[idx[i], neg_idx])
            weight_CF[i, neg_items] = 1

        return u_recon, u_recon_tmp, weight_CF
    
    # not used
    def evaluation(self, pairs, idx):
        # indices
        users = pairs[:, 0]
        pos_items = pairs[:, 1].cpu().numpy().reshape((len(users), 1))
        neg_items = np.zeros((len(users), 99))
        for i in range(len(users)):
            neg_idx = np.random.randint(0, neg_count[users[i]], size=(1, 99))
            neg_items[i] = np.array(negs_np[users[i], neg_idx])
        eval_items = torch.LongTensor(np.concatenate((pos_items, neg_items), axis=1)).cuda(gpu)
        
        us = self.matrix[users]
        us_recon, _, _ = self.forward(us, idx)
        eval_sort_sum = torch.zeros_like(eval_items).cuda(gpu)
        for i in range(len(users)):
            u_recon = us_recon[i]
            u_recon_eval = u_recon[eval_items[i]]
            eval_sort = torch.argsort(u_recon_eval, descending=True)
            eval_sort_sum[i] = eval_sort

        return HR(5, eval_sort_sum), HR(10, eval_sort_sum), HR(20, eval_sort_sum), NDCG(5, eval_sort_sum), NDCG(10, eval_sort_sum), NDCG(20, eval_sort_sum)

### Warm-up

In [5]:
# model
T_size = 50
S_size = 5
T = CDAE(T_size, train_matrix_input, 1)
S = CDAE(S_size, train_matrix_input, 1)

In [6]:
# Teacher
use_cuda = torch.cuda.is_available()
bs = 128
lr = 0.002
wd = 0.001
num_neg = 5
epochs = 1000
verbose = 100

# data
train_loader = torch.utils.data.DataLoader(dataset = train_dset, batch_size = bs, shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_dset, batch_size = 1024, shuffle = False)

optimizer = optim.Adam(T.parameters(), lr = lr, weight_decay = wd)
criterion = torch.nn.BCELoss(reduction='none')
if use_cuda:
    T = T.cuda(gpu)
    criterion = criterion.cuda(gpu)

for epoch in range(1, epochs+1):
    T.train()
    loss_train = np.zeros(1)
    t0 = time.time()
    for batch_idx, (us, idxs) in enumerate(train_loader):
        if use_cuda:
            us = us.cuda(gpu)
            idxs = idxs.cuda(gpu)

        optimizer.zero_grad()
        us_recon, _, weight = T(us, idxs)

        loss = torch.sum(criterion(us_recon, us) * weight)
        loss.backward()
        optimizer.step()

        loss_train[0] += loss.cpu().tolist() 
    loss_train /= len(train_loader)

    if epoch % verbose == 0:
        print('epoch = {}, loss = {:.3f}, time = {:.4f}'.format(epoch, loss_train[0], time.time()-t0))

epoch = 100, loss = 1262.004, time = 1.9682
epoch = 200, loss = 754.579, time = 1.9370
epoch = 300, loss = 601.370, time = 2.4176
epoch = 400, loss = 531.978, time = 2.0499
epoch = 500, loss = 495.278, time = 1.9370
epoch = 600, loss = 472.245, time = 1.9982
epoch = 700, loss = 452.370, time = 2.0932
epoch = 800, loss = 439.284, time = 1.9370
epoch = 900, loss = 427.184, time = 1.9370
epoch = 1000, loss = 422.620, time = 2.2338


In [7]:
# Student
use_cuda = torch.cuda.is_available()
bs = 128
lr = 0.002
wd = 0.001
num_neg = 5
epochs = 1000
verbose = 100

# data
train_loader = torch.utils.data.DataLoader(dataset = train_dset, batch_size = bs, shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_dset, batch_size = 1024, shuffle = False)

optimizer = optim.Adam(S.parameters(), lr = lr, weight_decay = wd)
criterion = torch.nn.BCELoss(reduction='none')
if use_cuda:
    S = S.cuda(gpu)
    criterion = criterion.cuda(gpu)

for epoch in range(1, epochs+1):
    S.train()
    loss_train = np.zeros(1)
    t0 = time.time()
    for batch_idx, (us, idxs) in enumerate(train_loader):
        if use_cuda:
            us = us.cuda(gpu)
            idxs = idxs.cuda(gpu)

        optimizer.zero_grad()
        us_recon, _, weight = S(us, idxs)

        loss = torch.sum(criterion(us_recon, us) * weight)
        loss.backward()
        optimizer.step()

        loss_train[0] += loss.cpu().tolist() 
    loss_train /= len(train_loader)

    if epoch % verbose == 0:
        print('epoch = {}, loss = {:.3f}, time = {:.4f}'.format(epoch, loss_train[0], time.time()-t0))

epoch = 100, loss = 5660.510, time = 1.8341
epoch = 200, loss = 4727.627, time = 1.9392
epoch = 300, loss = 4262.751, time = 2.0464
epoch = 400, loss = 3987.547, time = 1.9214
epoch = 500, loss = 3807.204, time = 1.8277
epoch = 600, loss = 3691.051, time = 2.0464
epoch = 700, loss = 3608.240, time = 2.1487
epoch = 800, loss = 3534.927, time = 1.8902
epoch = 900, loss = 3483.329, time = 1.8277
epoch = 1000, loss = 3445.293, time = 1.8277


### Bidirectional Distillation

In [8]:
# sampling function
def get_KD_instances(u, idx):
    weight_KD_T = torch.zeros_like(u)
    weight_KD_S = torch.zeros_like(u)
    for i in range(len(idx)):
        neg_idx_T = torch.multinomial(weights_T[idx[i]], int(torch.sum(u[i])*num_neg), replacement=True)
        neg_idx_S = torch.multinomial(weights_S[idx[i]], int(torch.sum(u[i])*num_neg), replacement=True)
        weight_KD_T[i, neg_idx_T] = 1
        weight_KD_S[i, neg_idx_S] = 1

    return weight_KD_T, weight_KD_S

In [9]:
# hyperparameters
use_cuda = torch.cuda.is_available()
bs = 128
lr = 0.002
lamb_T = 0.5
lamb_S = 0.5

temp_T = 2
temp_S = 2

eps = 1e-4
eps_tanh = 1e-4
num_neg = 5
update = 10

epochs = 1000
verbose = 100

# data
train_loader = torch.utils.data.DataLoader(dataset = train_dset, batch_size = bs, shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_dset, batch_size = 1024, shuffle = False)

# loss
optimizer_T = optim.Adam(T.parameters(), lr = lr, weight_decay=0)
optimizer_S = optim.Adam(S.parameters(), lr = lr, weight_decay=0)
criterion = torch.nn.BCELoss(reduction='none')

# cuda
T = T.cuda(gpu)
S = S.cuda(gpu)

for epoch in range(epochs+1):
    loss_train = np.zeros(3)
    t0 = time.time()

    # update rank matrix
    if (epoch % update == 0):
        with torch.no_grad():
            T.eval()
            S.eval()
            T.temp = 10
            S.temp = 10
            rank_matrix_T = torch.zeros_like(train_matrix_input).type(torch.LongTensor)
            rank_matrix_S = torch.zeros_like(train_matrix_input).type(torch.LongTensor)
            mask_matrix = 1-train_matrix_input.cuda(gpu).float()

            for u in range(num_users):
                us = train_matrix_input[u].cuda(gpu)
                idx = torch.LongTensor([u]).cuda(gpu)
                _, us_recon_tmp_T, _ = T(us, idx)
                _, us_recon_tmp_S, _ = S(us, idx)
                us_mask_T = us_recon_tmp_T * mask_matrix[u]
                us_mask_S = us_recon_tmp_S * mask_matrix[u]
                rank_list_T = torch.argsort(us_mask_T)
                rank_list_S = torch.argsort(us_mask_S)
                rank_matrix_T[u] = rank_list_T
                rank_matrix_S[u] = rank_list_S

            ranklist_T = torch.zeros_like(rank_matrix_T)
            for i in range(len(ranklist_T)):
                row = rank_matrix_T[i]
                ranklist_T[i][row] = torch.LongTensor(np.arange(len(row))) + 1

            ranklist_S = torch.zeros_like(rank_matrix_S)
            for i in range(len(ranklist_S)):
                row = rank_matrix_S[i]
                ranklist_S[i][row] = torch.LongTensor(np.arange(len(row))) + 1

            rank_dif_T = ranklist_T - ranklist_S # T가 못한거
            rank_dif_S = ranklist_S - ranklist_T # S가 못한거

            weights_T = torch.exp(rank_dif_T.type(torch.FloatTensor) * eps).cuda(gpu)
            weights_S = torch.tanh(torch.max(rank_dif_S.type(torch.FloatTensor) * eps_tanh, torch.zeros_like(rank_dif_T).type(torch.FloatTensor))).cuda(gpu) 

    T.train()
    S.train()
    T.temp = temp_T
    S.temp = temp_S                                            
    for batch_idx, (us, idxs) in enumerate(train_loader):
        if use_cuda:
            us = us.cuda(gpu)
            idxs = idxs.cuda(gpu)

        ### train
        optimizer_T.zero_grad()
        optimizer_S.zero_grad()
        uT, uT_tmp, weightT_CF = T(us, idxs)
        uS, uS_tmp, weightS_CF = S(us, idxs)
        weightT_KD, weightS_KD = get_KD_instances(us, idxs)

        ### For T
        pseudo_label = uS_tmp.detach()
        loss_T_WS = torch.sum(criterion(uT, pseudo_label) * weightT_KD)
        loss_T_CF = torch.sum(criterion(uT, us) * weightT_CF)
        loss_T = loss_T_CF + loss_T_WS * lamb_T
        loss_T.backward()
        optimizer_T.step()

        ### For S
        pseudo_label = uT_tmp.detach()
        loss_S_WS = torch.sum(criterion(uS, pseudo_label) * weightS_KD)
        loss_S_CF = torch.sum(criterion(uS, us) * weightS_CF)
        loss_S = loss_S_CF + loss_S_WS * lamb_S
        loss_S.backward()
        optimizer_S.step()

        #loss_train[0] += loss.cpu().tolist() 
        loss_train[1] += loss_T.cpu().tolist() 
        loss_train[2] += loss_S.cpu().tolist()
    loss_train /= len(train_loader)

    if epoch % verbose == 0:
        print('epoch = {}, loss = {:.3f}+{:.3f}= {:.3f}, time = {:.4f}'.format(epoch, loss_train[1], loss_train[2], loss_train[0], time.time()-t0))
        ## full val
        rank_T = []
        for row in test:
            row = row.numpy()
            rank_T.append(num_items - np.where(rank_matrix_T[row[0]] == row[1])[0][0])
        rank_T = np.array(rank_T)
        ndcg = 1 / np.log2(rank_T + 2)

        print("{:.4f}, {:.4f}, {:.4f}, {:.4f}, {:.4f}, {:.4f}".format((rank_T < 50).mean(), (rank_T < 100).mean(), (rank_T < 200).mean(), (ndcg * (rank_T < 50)).mean(), (ndcg * (rank_T < 100)).mean(), (ndcg * (rank_T < 200)).mean()))

        rank_S = []
        for row in test:
            row = row.numpy()
            rank_S.append(num_items - np.where(rank_matrix_S[row[0]] == row[1])[0][0])
        rank_S = np.array(rank_S)
        ndcg = 1 / np.log2(rank_S + 2)

        print("{:.4f}, {:.4f}, {:.4f}, {:.4f}, {:.4f}, {:.4f}".format((rank_S < 50).mean(), (rank_S < 100).mean(), (rank_S < 200).mean(), (ndcg * (rank_S < 50)).mean(), (ndcg * (rank_S < 100)).mean(), (ndcg * (rank_S < 200)).mean()))

epoch = 0, loss = 4282.032+5001.801= 0.000, time = 22.4927
0.1797, 0.2409, 0.3131, 0.0497, 0.0596, 0.0697
0.0805, 0.1290, 0.1859, 0.0238, 0.0316, 0.0395
epoch = 100, loss = 3547.484+5538.613= 0.000, time = 23.7395
0.1838, 0.2510, 0.3185, 0.0514, 0.0624, 0.0718
0.0883, 0.1401, 0.2054, 0.0268, 0.0352, 0.0443
epoch = 200, loss = 3683.698+5601.457= 0.000, time = 23.4817
0.1843, 0.2527, 0.3209, 0.0518, 0.0629, 0.0724
0.0912, 0.1427, 0.2090, 0.0274, 0.0357, 0.0450
epoch = 300, loss = 3760.491+5648.504= 0.000, time = 22.7245
0.1861, 0.2552, 0.3246, 0.0522, 0.0634, 0.0731
0.0945, 0.1449, 0.2135, 0.0281, 0.0363, 0.0458
epoch = 400, loss = 3790.452+5653.207= 0.000, time = 22.7942
0.1910, 0.2566, 0.3232, 0.0544, 0.0650, 0.0744
0.0937, 0.1473, 0.2169, 0.0282, 0.0369, 0.0466
epoch = 500, loss = 3802.530+5649.742= 0.000, time = 23.1920
0.1949, 0.2617, 0.3275, 0.0548, 0.0656, 0.0748
0.0958, 0.1489, 0.2202, 0.0289, 0.0375, 0.0475
epoch = 600, loss = 3816.188+5640.162= 0.000, time = 23.0332
0.1924, 0.2