# Train

In [99]:
import torch
import torch.nn as nn
from utils import args
from numpy import linalg as LA
from tqdm import tqdm
import gzip
from metrics4rec import evaluate_all

In [40]:
device = args.device

In [108]:
class UIPNN(nn.Module):
    
    def __init__(self, ent_tot, have_path=True, dim = 100, p_norm = 1, norm_flag = True, margin = None, epsilon = None):
        super(UIPNN, self).__init__()
        self.dim = dim
        self.margin = margin
        self.epsilon = epsilon
        self.norm_flag = norm_flag
        self.p_norm = p_norm
        self.ent_tot = ent_tot
        self.have_path = have_path

        self.ent_embeddings = nn.Embedding(self.ent_tot, self.dim)
        
        nn.init.xavier_uniform_(self.ent_embeddings.weight.data)
        
        self.NN = nn.Sequential(nn.Linear(self.dim*(2+int(have_path)), 32),
                                nn.ReLU(),
#                                 nn.Linear(256, 128),
#                                 nn.ReLU(),
                                nn.Linear(32, 1),
                                nn.Sigmoid()
                               )
        
    def forward(self, x):
        x = self.ent_embeddings(x)
        x = x.reshape(-1, self.dim*(2+int(self.have_path)))
        return self.NN(x)
    
    
    

In [6]:
print('loading transE embedding...')
ckpt = torch.load(args.transE_embedding_file, map_location=torch.device('cpu'))
embeds = ckpt['ent_embeddings.weight'].cpu().numpy()
rels = ckpt['rel_embeddings.weight'].cpu().numpy()

print(type(embeds), embeds.shape)
print(type(rels), rels.shape)

loading transE embedding...
<class 'numpy.ndarray'> (169931, 100)
<class 'numpy.ndarray'> (90, 100)


In [None]:
def transE_eval(embeds, rels, ui_cands):
    ui_scores = {}
    cnt = 0
    for uid in ui_cands:
        iids = ui_cands[uid]
        u_emb = embeds[uid] + rels[0]  # user + purchase
        i_embs = embeds[iids]
        scores = np.expand_dims(u_emb, 0) - i_embs
        scores = LA.norm(scores, ord=1, axis=1) * (-1)  # larger is better
        ui_scores[uid] = dict(zip(iids, scores.tolist()))
        cnt += 1
        if cnt % 5000 == 0:
            print(cnt)
    return ui_scores

In [7]:
ur_embs = embeds[:61254] + rels[0]
i_embs = embeds[61254:108857]

In [9]:
scores = ur_embs[0] - i_embs
print(scores.shape)

(47603, 100)


In [27]:
k = 20 # pos neg smaple num
train_data = []
for uid, ur_emb in tqdm(enumerate(ur_embs)):
    scores = ur_emb - i_embs
    scores = LA.norm(scores, ord=1, axis=1) * (-1)
    train_data.append(np.column_stack([np.ones(k,dtype=int)*uid, np.argpartition(scores, -k)[-k:], np.ones(k,dtype=int)]))
    train_data.append(np.column_stack([np.ones(k,dtype=int)*uid, np.argpartition(scores, k)[:k],np.zeros(k,dtype=int)]))

61254it [08:53, 114.91it/s]


In [32]:
train_data2 = np.vstack(train_data)
train_data2.shape

(2450160, 3)

In [71]:
class Train_DataSet(torch.utils.data.Dataset):
    def __init__(self, data, label):
        self.data = data
        self.label = label.astype(np.float32)
    def __getitem__(self, index):
        return self.data[index], self.label[index]
    def __len__(self):
        return len(self.data)

In [72]:
dataset = Train_DataSet(train_data2[:,:2], train_data2[:,2:])
train_dataloader = torch.utils.data.DataLoader(dataset, batch_size=256, shuffle=True)

In [109]:
ui_model = UIPNN(108858, have_path=False).to(device)

In [110]:
num_epochs = 2
lr = 1e-3
wd = 1e-6
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(ui_model.parameters(), lr=lr, weight_decay=wd)
for epoch in range(num_epochs):
    for i, (x, label) in enumerate(train_dataloader):
        score = ui_model(x.to(device))
        label = label.to(device)
        loss = criterion(score, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if i % 300 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                  .format(epoch+1, num_epochs, i+1, len(train_dataloader), loss.item()))

Epoch [1/2], Step [1/9571], Loss: 0.6935
Epoch [1/2], Step [301/9571], Loss: 0.0447
Epoch [1/2], Step [601/9571], Loss: 0.0290
Epoch [1/2], Step [901/9571], Loss: 0.0215
Epoch [1/2], Step [1201/9571], Loss: 0.0120
Epoch [1/2], Step [1501/9571], Loss: 0.0117
Epoch [1/2], Step [1801/9571], Loss: 0.0158
Epoch [1/2], Step [2101/9571], Loss: 0.0159
Epoch [1/2], Step [2401/9571], Loss: 0.0517
Epoch [1/2], Step [2701/9571], Loss: 0.0204
Epoch [1/2], Step [3001/9571], Loss: 0.0082
Epoch [1/2], Step [3301/9571], Loss: 0.0103
Epoch [1/2], Step [3601/9571], Loss: 0.0078
Epoch [1/2], Step [3901/9571], Loss: 0.0007
Epoch [1/2], Step [4201/9571], Loss: 0.0093
Epoch [1/2], Step [4501/9571], Loss: 0.0026
Epoch [1/2], Step [4801/9571], Loss: 0.0026
Epoch [1/2], Step [5101/9571], Loss: 0.0077
Epoch [1/2], Step [5401/9571], Loss: 0.0071
Epoch [1/2], Step [5701/9571], Loss: 0.0085
Epoch [1/2], Step [6001/9571], Loss: 0.0072
Epoch [1/2], Step [6301/9571], Loss: 0.0025
Epoch [1/2], Step [6601/9571], Loss: 0

# Test

In [75]:
def load_candidate_items(args_):
    print('loading candidate items...')
    ui_cands = {}
    ui_gt = {}
    with gzip.open(args_.kg_test_candidates_file, 'rt') as f:
       for line in f:
           cells = line.split()
           uid = int(cells[0])
           item_ids = [int(i) for i in cells[1:]]
           ui_cands[uid] = item_ids
    #return ui_cands
    #data = np.load(args_.kg_test_candidates_file)['candidates']
    # ui_cands = {}
    # ui_gt = {}
    with open(args_.kg_test_triples_file, 'rt') as f:
        for line in f:
            line = line.split()
            uid = int(line[0])
            iid = int(line[1])
            if uid not in ui_gt:
                ui_gt[uid] = [iid]
            else:
                ui_gt[uid].append(iid)

    return ui_cands, ui_gt


In [78]:
ui_cands, gt = load_candidate_items(args)
print(len(ui_cands))

loading candidate items...
61254


In [None]:
len()

In [90]:
np.array(list(zip([0]*len(ui_cands[0]), ui_cands[0]))).shape

(1000, 2)

In [91]:
ui = np.zeros((len(ui_cands)*1000, 2), dtype=np.int)
for i, u in enumerate(ui_cands):
    ui[1000*i:1000*(i+1), 0] = u
    ui[1000*i:1000*(i+1), 1] = ui_cands[u]
ui.shape

(61254000, 2)

In [111]:
def ui_eval(X):
    print('cal ui score...')
    #model = torch.load(args.rec_ui_model_path, map_location=torch.device(device))
    model = ui_model.to('cpu')
    #print(model.device())
    model.eval()
    with torch.no_grad():
        scores = model(torch.from_numpy(X))
    return dict_scores(ui, scores)

def dict_scores(ui, scores):
    print('dicting score...')
    ui_scores = {}
    for i, u in enumerate(ui[::1000][:,0].flatten()):
        ui_scores[u] = dict(zip(ui[i*1000:(i+1)*1000,1].tolist(), scores[i*1000:(i+1)*1000].tolist()))
    return ui_scores

In [112]:
ui_scores = ui_eval(ui)
print('evaluating scores...')
evaluate_all(ui_scores, gt, 10)

cal ui score...
dicting score...
evaluating scores...

NDCG@10	Rec@10	Hits@10	Prec@10	MAP@10	MRR@10
0.0093	0.0100	0.0202	0.0020	0.0060	0.0060


('\nNDCG@10\tRec@10\tHits@10\tPrec@10\tMAP@10\tMRR@10\n0.0093\t0.0100\t0.0202\t0.0020\t0.0060\t0.0060',
 {'ndcg': 0.009266310105280604,
  'map': 0.006032599231614806,
  'recall': 0.00995696783688287,
  'precision': 0.002034152871649153,
  'mrr': 0.006030594179531392,
  'hit': 0.020210925000816272})