In [1]:
import torch

from usrapprox.usrapprox.models.aligner_v2 import AlignerV2Wrapper


DEVICE = "cuda" if torch.cuda.is_available() else "cpu"  # use GPU if we can!
print("DEVICE: ", DEVICE)
alignerv2 = AlignerV2Wrapper(device=DEVICE)
alignerv2.to(DEVICE)

DEVICE:  cuda
[MODEL] Using aggregation type: gating-tanh
[MODEL] Using learnable temperature
[MODEL] Using projection type: shared


AlignerV2Wrapper(
  (gate): Linear(in_features=9984, out_features=9984, bias=True)
  (gate_transform): Linear(in_features=768, out_features=768, bias=True)
  (gate_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (gate_out_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (drop): Dropout(p=0.2, inplace=False)
  (users): Embedding(967, 256)
  (linear1): Linear(in_features=256, out_features=2048, bias=True)
  (fc1): Linear(in_features=256, out_features=2048, bias=True)
  (linear2): Linear(in_features=2048, out_features=768, bias=True)
  (fc2): Linear(in_features=2048, out_features=768, bias=True)
  (ln_usr): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (linear3): Linear(in_features=256, out_features=768, bias=True)
  (f5): Linear(in_features=768, out_features=2048, bias=True)
  (f6): Linear(in_features=2048, out_features=768, bias=True)
  (ln2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)

In [2]:
import json

splits_path = "usrembeds/data/splits.json"
embs_path = "usrembeds/data/embeddings/embeddings_full_split"

with open(splits_path, "r") as f:
    splits = json.load(f)

In [3]:
len(splits["train"])

70834

In [13]:
import os
from tqdm import tqdm


class MyDataset(torch.utils.data.Dataset):
    def __init__(self, nneg=20):
        splits_path = "usrembeds/data/splits.json"
        self.embs_path = "usrembeds/data/embeddings/embeddings_full_split"
        self.nneg = nneg

        with open(splits_path, "r") as f:
            splits = json.load(f)
        self.splits = splits["train"]

    def __getitem__(self, index):
        items = []
        for i in range(1 + self.nneg):
            items.append(self.__get_embedding(index + i))

        return torch.Tensor(items)

    def __len__(self):
        return len(self.splits) // (1 + self.nneg)

    def __get_embedding(self, idx):
        posset = self.splits[idx]
        emb_file = os.path.join(self.embs_path, f"{posset}.json")
        if os.path.isfile(emb_file):
            try:
                with open(emb_file, "r") as f:
                    data = json.load(f)
                    if posset in data:
                        return data[posset][0]
                    else:
                        print("No embeddings for posset")
                        return [0.0]
            except:
                print("Error reading file")
                return [0.0]
        else:
            print("File does not exist")
            return [0.0]


dataset = MyDataset(1)

dataloader = torch.utils.data.DataLoader(
    dataset, batch_size=128, shuffle=True, num_workers=10
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Ensure index tensor is on the same device

# Move model and batch to the same device
alignerv2 = alignerv2.to(device)

for batch in tqdm(dataloader):
    index = torch.LongTensor([1 for _ in range(batch.shape[0])]).to(device)
    batch = batch.to(device)  # Ensure batch is on the same device
    user_embedding, music_feedback = alignerv2(index, batch, True)

100%|██████████| 277/277 [00:35<00:00,  7.91it/s]


  0%|          | 0/1107 [00:00<?, ?it/s]
