### SAMPLERS AND DATALOADERS

In [1]:
dataset_name = "Beauty"
from tqdm import tqdm
from src.data_preprocessing import *
from src.config import *
from src.torch_modules import *
from src.train import *
from src.sampler import *
from src.utils import *
from torch.utils.data import DataLoader
import torch

In [2]:
device = "cuda:0"
dataset, args, Psampler, config, model, optimizer, scheduler, CXTDict = load_model_stuff(dataset_name, device)

Loading datasets ...
 The dataset Beauty contains 52204 users and 57289 items in total
average sequence length: {5.63}
ItemFeatures DF dimensions (57290, 6507)
Loading Dataloader ...
iterations_num 94019404
Loading Configuration ...
Loading Model ...


RuntimeError: No CUDA GPUs are available

In [None]:
args.batch_size = 2

In [4]:
def training_loop_func(args, model, Psampler, device, optimizer, scheduler, dataset=None, cxtdict=None):
    counter, T, t0 = 0, 0.0, time.time()
    model.train()
    final_loss = 0
    Psampler.device = "cpu"
    train_data = DataLoader(Psampler, batch_size=args.batch_size, num_workers=0)
    evaluation_point = len(train_data) / ((args.num_epochs + 1) / 20)

    pbar = tqdm(train_data, total=len(train_data), ncols=70, leave=False, unit='batch')
    for batch in pbar:
        counter += 1
        pbar.set_description("Training ...")
        u, seq, pos, neg, seqcxt, poscxt, negcxt = [value.to(device).long() for value in batch]

        # Initialize the gradients with zeros
        model.zero_grad()

        # Forward Propagation and Backward propagation
        logits, loss = model(user_seq=u, seq=seq, seqcxt=seqcxt, pos=pos, neg=neg, poscxt=poscxt, negcxt=negcxt)
        loss.backward()

        # Updates
        optimizer.step()
        scheduler.step()

        # accumulate the loss for the BP
        final_loss += loss.item()

        if counter % evaluation_point == 0:
            t1 = time.time() - t0
            T += t1
            print('Evaluating')
            t_test = evaluate(model, dataset, args, cxtdict)
            t_val = evaluate_validation(model, dataset, args, cxtdict)
            print(
                'epoch:%d, time: %f(s), valid (NDCG@10: %.4f, HR@10: %.4f, AUC: %.4f)' % (
                    (counter / evaluation_point) * 20, T, t_val[0], t_val[1], t_val[2]))
            print(
                'epoch:%d, time: %f(s), test (NDCG@10: %.4f, HR@10: %.4f, AUC: %.4f)' % (
                    (counter / evaluation_point) * 20, T, t_test[0], t_test[1], t_test[2]))
            t0 = time.time()

    print("Done")


In [5]:
import torch
torch.cuda.empty_cache()

In [6]:
training_loop_func(args, model, Psampler, device, optimizer, scheduler, dataset, CXTDict)

                                                                      

seq seqcxt torch.Size([2, 10]) torch.Size([2, 10, 6])
BBB seq seqcxt torch.Size([2, 10]) torch.Size([2, 10, 6])
AAAA seq seqcxt torch.Size([2, 10]) torch.Size([2, 10, 6])
cxt, att torch.Size([2, 10, 6]) torch.Size([2, 10, 6507])




RuntimeError: expected scalar type Double but found Float

### METRICS

In [11]:
def NDCG(ans, cand):
    """ NDCG for lists and np.arrays.
        Log in base 2.
        Relevance = 1.
    """
    cand_len = len(cand)
    idcg = 1
    dcg = 0
    if cand[0] in ans:  dcg = 1
    for i in range(cand_len):
        if cand[i] in ans:
            dcg += (1 / math.log(i + 2, 2))
        idcg += (1 / math.log(i + 2, 2))

    return dcg / idcg


In [32]:
NDCG(p, q)

0.5790911234048474

In [30]:
p = list(range(10))
q=[np.random.randint(0, 13) for _ in range(10)]

In [31]:
p, q

([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [10, 4, 9, 6, 9, 8, 11, 6, 8, 7])