In [1]:
import os
from tqdm import tqdm
import torch
import json


class MyDataset(torch.utils.data.Dataset):
    def __init__(self, nneg=20):
        splits_path = "usrembeds/data/splits.json"
        self.embs_path = "usrembeds/data/embeddings/embeddings_full_split"
        self.nneg = nneg

        with open(splits_path, "r") as f:
            splits = json.load(f)
        self.splits = splits["train"]

    def __getitem__(self, index):
        items = []
        for i in range(1 + self.nneg):
            items.append(self.__get_embedding(index + i))

        return torch.Tensor(items)

    def __len__(self):
        return len(self.splits) // (1 + self.nneg)

    def __get_embedding(self, idx):
        posset = self.splits[idx]
        emb_file = os.path.join(self.embs_path, f"{posset}.json")
        if os.path.isfile(emb_file):
            try:
                with open(emb_file, "r") as f:
                    data = json.load(f)
                    if posset in data:
                        return data[posset][0]
                    else:
                        print("No embeddings for posset")
                        return [0.0]
            except:
                print("Error reading file")
                return [0.0]
        else:
            print("File does not exist")
            return [0.0]


dataset = MyDataset(1)

dataloader = torch.utils.data.DataLoader(
    dataset, batch_size=128, shuffle=True, num_workers=10
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Verificare che AlignerV2 funzioni (voglio un forward)

Ricorda che ritorna 3 valori, non 2:

- usr_embedding
- embedding
- score


In [2]:
# Move model and batch to the same device

from usrapprox.usrapprox.models.aligner_v2 import AlignerV2Wrapper


usr_emb = AlignerV2Wrapper()

for batch in tqdm(dataloader):
    index = torch.LongTensor([1 for _ in range(batch.shape[0])]).to(device)
    batch = batch.to(device)  # Ensure batch is on the same device
    user_embedding, embeddings, music_feedback = usr_emb(index, batch)

    print(user_embedding.shape)
    print(embeddings.shape)
    print(music_feedback.shape)
    # print(music_feedback)

    break

[MODEL] Using aggregation type: gating-tanh
[MODEL] Using learnable temperature
[MODEL] Using projection type: shared


  0%|          | 0/277 [00:00<?, ?it/s]

torch.Size([128, 768])
torch.Size([128, 2, 768])
torch.Size([128, 2])


  0%|          | 0/277 [00:01<?, ?it/s]


## Verificare che UsrEmb funzioni (voglio un forward)


In [5]:
# Move model and batch to the same device


from usrapprox.usrapprox.models.usr_emb import UsrEmb
import torch

usr_emb = UsrEmb(device=device)
usr_emb.to(device)

for batch in tqdm(dataloader):
    # index = torch.LongTensor([1 for _ in range(batch.shape[0])])
    # .to(device)
    # batch = batch.to(device)  # Ensure batch is on the same device
    user_embedding, embeddings = usr_emb(batch.to(device))

    print(user_embedding.shape)
    print(embeddings.shape)

    break

[MODEL] Using aggregation type: gating-tanh
[MODEL] Using learnable temperature
[MODEL] Using projection type: shared


  0%|          | 0/277 [00:25<?, ?it/s]


KeyboardInterrupt: 

## Creare un dataset per 1 utente con gli score pre-calcolati

- Calcolare score per l'intero dataset
- Fare un dataloader che carica con 5 positivi e 5 negativi


## Loss function con scoring


# Classe che gestisce $\mathcal{n}$ utenti e una memoria di canzoni per ogni singolo utente

- memoria circolare canzoni x utente
- gestione load/store utente
- metodo per fare finetuning per utente `n` data una batch nuova
- metodo per avere lo score (cosine) data una batch
