In [1]:
import torch.nn as nn
import torch
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset

from torch.utils.data import Dataset, DataLoader

import pandas as pd
import numpy as np

from datetime import datetime
from tqdm import tqdm

import random

from sklearn.model_selection import train_test_split

In [2]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [3]:
df_users = pd.read_parquet('user_features_clean.parquet')
df_movies = pd.read_parquet('Movies_clean_Vec_v4_25keywords.parquet')
df_ratings = pd.read_parquet('ratings_groupped_ids.parquet')

# Przygotowanie movieId dla datasetów

In [4]:
print(df_users.info())
print(df_ratings.info())
print(df_movies.info())

empty_pos_ratings = df_ratings['pos'].apply(lambda x: len(x) == 0).sum()
empty_neg_ratings = df_ratings['neg'].apply(lambda x: len(x) == 0).sum()

if empty_pos_ratings != 0 or empty_neg_ratings != 0:
    print(f'Empty ratings: pos: {empty_pos_ratings}, neg: {empty_neg_ratings}')
    raise Exception("Users without a single pos/neg rating exist in the ratings_groupped_ids dataset")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 198832 entries, 0 to 198831
Data columns (total 29 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   userId                   198832 non-null  int64  
 1   num_rating               198832 non-null  float64
 2   avg_rating               198832 non-null  float64
 3   weekend_watcher          198832 non-null  float64
 4   genre_Action             198832 non-null  float64
 5   genre_Adventure          198832 non-null  float64
 6   genre_Animation          198832 non-null  float64
 7   genre_Comedy             198832 non-null  float64
 8   genre_Crime              198832 non-null  float64
 9   genre_Documentary        198832 non-null  float64
 10  genre_Drama              198832 non-null  float64
 11  genre_Family             198832 non-null  float64
 12  genre_Fantasy            198832 non-null  float64
 13  genre_History            198832 non-null  float64
 14  genr

In [5]:
unique_ids = set(
        df_users['movies_seq'].explode().tolist()
        + df_ratings['pos'].explode().tolist() 
        + df_ratings['neg'].explode().tolist()
    )

print('Unique movieIds:', len(unique_ids))
unique_ids = sorted(unique_ids)

movieId_to_idx = {id_: idx for idx, id_ in enumerate(unique_ids)}
print('min idx:', min(movieId_to_idx.values()))
print('max idx:', max(movieId_to_idx.values()))

n_items = len(unique_ids)

assert min(movieId_to_idx.values()) == 0
assert max(movieId_to_idx.values()) == n_items - 1

# unique_ids = sorted(df_movies['movieId'].unique())
# movieId_to_idx = {id_: idx for idx, id_ in enumerate(unique_ids)}
# n_items = len(movieId_to_idx)


Unique movieIds: 82932
min idx: 0
max idx: 82931


In [6]:
# Zmapuj movieId do indeksów
df_users['movies_seq'] = df_users['movies_seq'].apply(lambda lst: [movieId_to_idx[m] for m in lst])
df_ratings['pos'] = df_ratings['pos'].apply(lambda lst: [movieId_to_idx[m] for m in lst])
df_ratings['neg'] = df_ratings['neg'].apply(lambda lst: [movieId_to_idx[m] for m in lst])

# df_movies musi być ograniczone tylko do używanych filmów
df_movies = df_movies[df_movies['movieId'].isin(movieId_to_idx)]
df_movies['movieId'] = df_movies['movieId'].map(movieId_to_idx)

# Final sanity check
assert df_users['movies_seq'].explode().max() < n_items
assert df_ratings['pos'].explode().max() < n_items
assert df_ratings['neg'].explode().max() < n_items
assert df_movies['movieId'].max() < n_items
assert df_movies['movieId'].notna().all(), "Some movieIds weren't mapped!"

In [7]:
max_movie_idx = df_users['movies_seq'].explode().max()
print("max_movie_idx =", max_movie_idx)
print("n_items =", n_items)

assert max_movie_idx < n_items, "Indeks filmu przekracza rozmiar embeddingu"

max_movie_idx = 82931
n_items = 82932


In [8]:
def has_invalid_entries(seq_col):
    return seq_col.explode().isin([-1, np.nan, None]).any()

print("Zawiera niepoprawne wartości:", has_invalid_entries(df_users['movies_seq']))


Zawiera niepoprawne wartości: False


In [9]:
df_movies.info()
df_movies.head(83000)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 82918 entries, 0 to 82917
Data columns (total 29 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   movieId              82918 non-null  int64  
 1   runtime              82918 non-null  float64
 2   if_blockbuster       82918 non-null  int64  
 3   highly_watched       82918 non-null  int64  
 4   highly_rated         82918 non-null  int64  
 5   engagement_score     82918 non-null  float64
 6   cast_importance      82918 non-null  float64
 7   director_score       82918 non-null  float64
 8   has_keywords         82918 non-null  int64  
 9   has_cast             82918 non-null  int64  
 10  has_director         82918 non-null  int64  
 11  genre_ids            82918 non-null  object 
 12  decade_[1890, 1900)  82918 non-null  bool   
 13  decade_[1900, 1910)  82918 non-null  bool   
 14  decade_[1910, 1920)  82918 non-null  bool   
 15  decade_[1920, 1930)  82918 non-null 

Unnamed: 0,movieId,runtime,if_blockbuster,highly_watched,highly_rated,engagement_score,cast_importance,director_score,has_keywords,has_cast,...,"decade_[1960, 1970)","decade_[1970, 1980)","decade_[1980, 1990)","decade_[1990, 2000)","decade_[2000, 2010)","decade_[2010, 2020)","decade_[2020, 2030)",text_embedded,actor_ids,director_ids
0,14840,1.942703,1,1,1,4.591432,2.899632,2.653210,1,1,...,False,False,False,False,False,True,False,"[0.024012607, 0.106287114, -0.14222005, 0.0311...","[6454, 10631, 5457, 1952, 5950]",[797]
1,20922,2.432017,1,1,1,5.199338,2.789332,2.653210,1,1,...,False,False,False,False,False,True,False,"[-0.023986915, 0.11851813, -0.15495336, 0.1043...","[659, 7298, 4974, 10576, 5292]",[797]
2,12164,2.033104,1,1,1,5.199338,3.099369,2.653210,1,1,...,False,False,False,False,True,False,False,"[-0.08562198, 0.14635089, -0.10808112, 0.02524...","[1867, 3519, 7812, 1952, 4010]",[797]
3,14021,2.256745,1,1,1,4.123958,2.512635,2.304477,1,1,...,False,False,False,False,True,False,False,"[-0.09937162, 0.114497274, -0.13823982, 0.0438...","[11434, 9935, 7629, 9574, 3709]",[2026]
4,16934,1.824556,1,1,1,5.199338,5.199338,1.817788,1,1,...,False,False,False,False,False,True,False,"[0.01349653, 0.096834205, -0.15448155, 0.05628...","[9686, 1839, 1834, 9161, 4923]",[2496]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82913,38978,-0.803958,0,0,0,-5.199338,-2.705413,-2.651732,0,1,...,False,False,False,False,False,True,False,"[-0.045425046, 0.15597995, -0.15817073, 0.0387...",[11444],[5144]
82914,81080,-0.119468,0,0,0,-2.366351,-2.758924,-2.138713,1,1,...,False,False,False,False,False,False,False,"[-0.12103934, 0.09663682, -0.13345358, -0.0090...",[11444],[5144]
82915,76460,-1.283836,0,0,0,-5.199338,-0.593940,-0.071571,0,0,...,False,False,False,False,True,False,False,"[-0.13175464, 0.06275898, -0.1324712, 0.099017...",[11444],[5144]
82916,70801,-0.888309,0,0,0,-5.199338,-0.593940,-0.071571,0,0,...,False,False,False,False,False,False,True,"[-0.031386144, 0.14395967, -0.11520111, 0.1354...",[11444],[5144]


In [10]:
df_ratings.head(100000)

Unnamed: 0,userId,pos,neg
0,1,"[16, 29, 31, 79, 109, 164, 174, 229, 257, 298,...","[24, 28, 33, 35, 108, 159, 220, 340, 351, 522,..."
1,2,"[30, 33, 38, 47, 183, 184, 205, 214, 216, 219,...","[151, 191, 228, 250, 292, 301, 339, 344, 461, ..."
2,3,"[9, 10, 16, 25, 61, 108, 148, 149, 159, 257, 2...","[1, 47, 139, 151, 156, 166, 183, 206, 228, 324..."
3,4,"[220, 1232, 2011, 2660, 2731, 3063]","[1172, 1285, 1452, 1732, 2320, 2382, 2491, 249..."
4,5,"[9, 108, 159, 163, 344, 351, 359, 375, 429, 44...","[46, 148, 151, 183, 206, 228, 250, 285, 288, 2..."
...,...,...,...
99995,101075,"[49, 232, 257, 269, 292, 298, 475, 529, 536, 5...","[0, 1, 6, 16, 17, 18, 20, 31, 33, 34, 42, 45, ..."
99996,101076,"[0, 5, 46, 109, 257, 292, 314, 339, 351, 475, ...","[18, 49, 69, 194, 206, 220, 228, 236, 285, 289..."
99997,101077,"[46, 292, 351, 522, 585, 765, 1056, 1168, 1170...","[28, 30, 49, 103, 109, 160, 253, 314, 475, 577..."
99998,101078,"[292, 1071, 1166, 1285, 1619, 1770, 2056, 2205...","[1982, 2363, 2552, 3152, 5376]"


In [11]:
#FOR QUICK TEST's

DEBUG = False

if DEBUG:
    df_users = df_users.sample(n=1028, random_state=42).copy()
    df_ratings = df_ratings[df_ratings['userId'].isin(df_users['userId'])].copy()


# COSINE SIMILARITY

In [None]:
movie_features_dict = {
    row['movieId']: {
        'numeric': [
            row['runtime'],
            row['engagement_score'],
            row['cast_importance'],
            row['director_score']
        ],
        'binary': [
            row['if_blockbuster'],
            row['highly_watched'],
            row['highly_rated'],
            row['has_keywords'],
            row['has_cast'],
            row['has_director']
        ],
        'decades': [int(row[col]) for col in df_movies.columns if col.startswith('decade_')],
        'text_embedded': row['text_embedded'],
        'actor_ids': row['actor_ids'],
        'director_ids': row['director_ids'],
        'genre_ids': row['genre_ids']
    }
    for _, row in df_movies.iterrows()
}

In [None]:
movie_vecs = []  # List of torch.tensor [embedding_dim]
id_to_idx = {}   # movieId → index
idx_to_id = {}   # index → movieId

for i, (movie_id, feats) in enumerate(movie_features_dict.items()):
    combined = torch.cat([
        torch.tensor(feats['numeric'], dtype=torch.float32),
        torch.tensor(feats['binary'], dtype=torch.float32),
        torch.tensor(feats['decades'], dtype=torch.float32),
        torch.tensor(feats['text_embedded'], dtype=torch.float32)
    ])
    movie_vecs.append(F.normalize(combined.unsqueeze(0), dim=1).squeeze(0))
    id_to_idx[movie_id] = i
    idx_to_id[i] = movie_id

movie_matrix = torch.stack(movie_vecs)  # shape [n_movies, dim]

In [None]:
import faiss
movie_matrix_np = movie_matrix.cpu().numpy().astype('float32')
faiss.normalize_L2(movie_matrix_np)
index = faiss.IndexFlatIP(movie_matrix_np.shape[1])
index.add(movie_matrix_np)

In [None]:
def find_hard_negative(anchor_id, user_negs, id_to_idx, idx_to_id, faiss_index, movie_matrix_np, top_k=25):
    """
    Given an anchor movie ID and a set of user-specific negatives, find the most similar negative
    using FAISS based on inner product (cosine similarity if normalized).

    Returns:
        movieId of the selected hard negative
    """
    anchor_idx = id_to_idx.get(anchor_id)
    if anchor_idx is None:
        return random.choice(list(user_negs))  # fallback

    D, I = faiss_index.search(movie_matrix_np[anchor_idx].reshape(1, -1), top_k)

    for idx in I[0]:
        candidate_id = idx_to_id.get(idx)
        if candidate_id in user_negs:
            return candidate_id

    return random.choice(list(user_negs))  # fallback

# Przygotowanie danych (Item Tower)

In [12]:
class ItemDataset(Dataset):
    def __init__(self, df_movies):
        self.data = df_movies
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data.iloc[idx]

In [None]:
def collate_item(batch):
    combined_features = []
    actor_ids = []
    director_ids = []
    genre_ids = []

    max_len_a = max(len(row['actor_ids']) for row in batch)
    max_len_d = max(len(row['director_ids']) for row in batch)
    max_len_g = max(len(row['genre_ids']) for row in batch)

    for row in batch:

        pad_len_a = max_len_a - len(row['actor_ids'])
        pad_len_d = max_len_d - len(row['director_ids'])
        pad_len_g = max_len_g - len(row['genre_ids'])

        actor_ids.append(torch.tensor(row['actor_ids'] + [0]*pad_len_a))
        director_ids.append(torch.tensor(row['director_ids'] + [0]*pad_len_d))
        genre_ids.append(torch.tensor(row['genre_ids'] + [0]*pad_len_g))

        numeric_features = [
            row['runtime'],
            row['engagement_score'],
            row['cast_importance'],
            row['director_score']
        ]

        binary_features = [
            row['if_blockbuster'],
            row['highly_watched'],
            row['highly_rated'],
            row['has_keywords'],
            row['has_cast'],
            row['has_director']
        ]

        decade_features = [int(row[col]) for col in row.index if col.startswith("decade_")]

        c = torch.tensor(numeric_features + binary_features + decade_features, dtype=torch.float32)
        text_embedded = torch.tensor(row['text_embedded'], dtype=torch.float32)

        combined_features.append(torch.cat([c, text_embedded], dim=0))

    return {
        'features': torch.stack(combined_features),  # [B, F]
        'actor_ids': actor_ids,      # [B, max_actor_len]
        'director_ids': director_ids,# [B, max_director_len]
        'genre_ids': genre_ids       # [B, max_genre_len]
    }

In [None]:
testDataLoader = DataLoader(df_movies, batch_size=64, shuffle=True, collate_fn=collate_item)

In [None]:
# for m_id, t in chain([(m, 'pos') for m in pos_list], [(m, 'neg') for m in neg_list]):
#             mapped_m = movie_features_dict[m_id]
#
#             combined = torch.cat([
#                 torch.tensor(mapped_m['numeric'], dtype=torch.float32),
#                 torch.tensor(mapped_m['binary'], dtype=torch.float32),
#                 torch.tensor(mapped_m['decades'], dtype=torch.float32),
#                 torch.tensor(mapped_m['text_embedded'], dtype=torch.float32)
#             ], dim=0)
#
#             pad_len_a = max_len_a - len(mapped_m['actor_ids'])
#             pad_len_d = max_len_d - len(mapped_m['director_ids'])
#             pad_len_g = max_len_g - len(mapped_m['genre_ids'])
#
#             actors = torch.tensor((mapped_m['actor_ids'] + [0]*pad_len_a), dtype=torch.long)
#             directors = torch.tensor((mapped_m['director_ids'] + [0]*pad_len_d), dtype=torch.long)
#             genres= torch.tensor((mapped_m['genre_ids'] + [0]*pad_len_g), dtype=torch.long)
#
#             if t == 'pos':
#                 pos_combined_features.append(combined)
#                 pos_actor_ids.append(actors)
#                 pos_director_ids.append(directors)
#                 pos_genre_ids.append(genres)
#             else:
#                 neg_combined_features.append(combined)
#                 neg_actor_ids.append(actors)
#                 neg_director_ids.append(directors)
#                 neg_genre_ids.append(genres)

In [None]:
n_items = len(unique_ids)

def collate_TwoTower(batch):
    df_users, movies, ratings, timestamps = [], [], [], []

    anchor_combined_features, anchor_actor_ids, anchor_director_ids, anchor_genre_ids = [], [], [], []
    pos_combined_features, pos_actor_ids, pos_director_ids, pos_genre_ids = [], [], [], []
    neg_combined_features, neg_actor_ids, neg_director_ids, neg_genre_ids = [], [], [], []

    max_len_a = max(len(m['actor_ids']) for m in movie_features_dict.values())
    max_len_d = max(len(m['director_ids']) for m in movie_features_dict.values())
    max_len_g = max(len(m['genre_ids']) for m in movie_features_dict.values())

    for row in batch:

        # --- USER ---
        movies.append(torch.tensor(row['movies_seq'], dtype=torch.long))
        ratings.append(torch.tensor(row['ratings_seq'], dtype=torch.float32))
        timestamps.append(torch.tensor(row['ts_seq'], dtype=torch.float32))

        r = row[['num_rating', 'avg_rating', 'weekend_watcher', 'genre_Action', 'genre_Adventure', 'genre_Animation', 'genre_Comedy', 'genre_Crime', 'genre_Documentary', 'genre_Drama', 'genre_Family', 'genre_Fantasy', 'genre_History', 'genre_Horror', 'genre_Music', 'genre_Mystery', 'genre_Romance', 'genre_Science Fiction', 'genre_TV Movie', 'genre_Thriller', 'genre_War', 'genre_Western', 'type_of_viewer_negative', 'type_of_viewer_neutral', 'type_of_viewer_positive']]
        r = r.astype('float32').values

        df_users.append(torch.tensor(r, dtype=torch.float32))

        # --- ITEM ---
        pos_list = row['pos']
        neg_list = row['neg']

        # Less than 2 pos for triplet
        if len(pos_list) < 2 or len(neg_list) == 0:
            continue

        # Create pos anchor
        anchor_id, positive_id = random.sample(pos_list, 2)

        # Get 1 hard neg
        hard_neg_id = find_hard_negative(
            anchor_id,
            set(neg_list),
            id_to_idx, idx_to_id,
            index, movie_matrix_np,
            top_k=25
        )

        for m_id, t in zip([anchor_id, positive_id, hard_neg_id], ['anchor', 'pos', 'neg']):
            mapped_m = movie_features_dict[m_id]

            combined = torch.cat([
                torch.tensor(mapped_m['numeric'], dtype=torch.float32),
                torch.tensor(mapped_m['binary'], dtype=torch.float32),
                torch.tensor(mapped_m['decades'], dtype=torch.float32),
                torch.tensor(mapped_m['text_embedded'], dtype=torch.float32)
            ], dim=0)

            pad_len_a = max_len_a - len(mapped_m['actor_ids'])
            pad_len_d = max_len_d - len(mapped_m['director_ids'])
            pad_len_g = max_len_g - len(mapped_m['genre_ids'])

            actors = torch.tensor((mapped_m['actor_ids'] + [0]*pad_len_a), dtype=torch.long)
            directors = torch.tensor((mapped_m['director_ids'] + [0]*pad_len_d), dtype=torch.long)
            genres= torch.tensor((mapped_m['genre_ids'] + [0]*pad_len_g), dtype=torch.long)

            if t == 'anchor':
                anchor_combined_features.append(combined)
                anchor_actor_ids.append(actors)
                anchor_director_ids.append(directors)
                anchor_genre_ids.append(genres)
            elif t == 'pos':
                pos_combined_features.append(combined)
                pos_actor_ids.append(actors)
                pos_director_ids.append(directors)
                pos_genre_ids.append(genres)
            else:
                neg_combined_features.append(combined)
                neg_actor_ids.append(actors)
                neg_director_ids.append(directors)
                neg_genre_ids.append(genres)

    return {
        'user_inputs': {
            "df_users": torch.stack(df_users),
            "movies": torch.stack(movies),
            "ratings": torch.stack(ratings),
            "timestamps": torch.stack(timestamps),
        },
        'anchor': {
            'features': torch.stack(anchor_combined_features),
            'actor_ids': anchor_actor_ids,
            'director_ids': anchor_director_ids,
            'genre_ids': anchor_genre_ids
        },
        'pos': {
            'features': torch.stack(pos_combined_features),
            'actor_ids': pos_actor_ids,
            'director_ids': pos_director_ids,
            'genre_ids': pos_genre_ids
        },
        'neg': {
            'features': torch.stack(neg_combined_features),
            'actor_ids': neg_actor_ids,
            'director_ids': neg_director_ids,
            'genre_ids': neg_genre_ids
        }
    }

In [None]:
# from itertools import chain
#
# def generate_movie_features(row, movie_features_dict):
#     """
#     For a given row (with pos and neg movieId lists), generate tensors of features for all movies.
#     Returns: dict with 'features', 'actor_ids', 'director_ids', 'genre_ids'
#     """
#     user_id = row['userId']
#     pos_list = row['pos']
#     neg_list = row['neg']
#
#     user_movie_ids = list(chain(pos_list, neg_list))
#     features, actor_ids, director_ids, genre_ids = [], [], [], []
#
#     for m_id, t in user_movie_ids:
#         entry = movie_features_dict[m_id]
#
#         feat_tensor = torch.cat([
#             torch.tensor(entry['numeric'], dtype=torch.float32),
#             torch.tensor(entry['text_embedded'], dtype=torch.float32)
#         ])
#
#         features.append(feat_tensor)
#         actor_ids.append(torch.tensor(entry['actor_ids'], dtype=torch.long))
#         director_ids.append(torch.tensor(entry['director_ids'], dtype=torch.long))
#         genre_ids.append(torch.tensor(entry['genre_ids'], dtype=torch.long))
#
#         if target == 'pos':
#                 pos_feats.append(feats)
#                 pos_actor_ids.append(actor)
#                 pos_director_ids.append(director)
#                 pos_genre_ids.append(genre)
#             else:
#                 neg_feats.append(feats)
#                 neg_actor_ids.append(actor)
#                 neg_director_ids.append(director)
#                 neg_genre_ids.append(genre)
#
#     return {
#         'features': features,
#         'actor_ids': actor_ids,
#         'director_ids': director_ids,
#         'genre_ids': genre_ids,
#         'pos_count': len(pos_list)
#     }


# Przygotowanie danych (User Tower)

In [39]:
class UserDataset(Dataset):
    def __init__(self, df_users):
        self.data = df_users
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data.iloc[idx]

In [40]:
n_items = len(unique_ids)

def collate_user(batch):
    df_users, movies, ratings, timestamps, pos, neg = [], [], [], [], [], []

    for row in batch:
        movies.append(torch.tensor(row['movies_seq'], dtype=torch.long))
        ratings.append(torch.tensor(row['ratings_seq'], dtype=torch.float32))
        timestamps.append(torch.tensor(row['ts_seq'], dtype=torch.float32))

        userId = row['userId']

        r = row[['num_rating', 'avg_rating', 'weekend_watcher', 'genre_Action', 'genre_Adventure', 'genre_Animation', 'genre_Comedy', 'genre_Crime', 'genre_Documentary', 'genre_Drama', 'genre_Family', 'genre_Fantasy', 'genre_History', 'genre_Horror', 'genre_Music', 'genre_Mystery', 'genre_Romance', 'genre_Science Fiction', 'genre_TV Movie', 'genre_Thriller', 'genre_War', 'genre_Western', 'type_of_viewer_negative', 'type_of_viewer_neutral', 'type_of_viewer_positive']]
        r = r.astype('float32').values

        df_users.append(torch.tensor(r, dtype=torch.float32))
        
        # Get a random movieId that was rated positively and one that was rated negatively. 
        # Used during training to calculate BPR loss. 
        posAndNegRow = df_ratings[df_ratings['userId'] == userId].iloc[0]
        pos.append(torch.tensor(random.choice(posAndNegRow['pos']), dtype=torch.long))
        neg.append(torch.tensor(random.choice(posAndNegRow['neg']), dtype=torch.long))

    return {
        "input": {
            "df_users": torch.stack(df_users),
            "movies": torch.stack(movies),
            "ratings": torch.stack(ratings),
            "timestamps": torch.stack(timestamps),
        },
        "pos": torch.as_tensor(pos, dtype=torch.long),
        "neg": torch.as_tensor(neg, dtype=torch.long)
    }

# Model (Item Tower)

In [41]:
class ItemTower(nn.Module):
    def __init__(self, input_dim, embedding_dim=64, num_actors=10000, num_directors=5000, num_genres=19):
        super(ItemTower, self).__init__()
        self.actor_embedding = nn.EmbeddingBag(num_actors, 32, mode='mean')
        self.director_embedding = nn.EmbeddingBag(num_directors, 32, mode='mean')
        self.genre_embedding = nn.EmbeddingBag(num_genres, 16, mode='mean')

        self.model = nn.Sequential(
            nn.Linear(input_dim + 32 + 32 + 16, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, embedding_dim)
        )

    def forward(self, x, actor_bag, actor_offsets,
                  director_bag, director_offsets,
                  genre_bag, genre_offsets):
        actor_emb = self.actor_embedding(actor_bag, actor_offsets)
        director_emb = self.director_embedding(director_bag, director_offsets)
        genre_emb = self.genre_embedding(genre_bag, genre_offsets)
    
        x = torch.cat([x, actor_emb, director_emb, genre_emb], dim=1)
        output = self.model(x)
        v = F.normalize(output, dim=1)
        return v

# Model (User Tower)

In [42]:
class UserTower(nn.Module):
    def __init__(self, input_dim, embedding_dim=64, n_items=1000):
        super(UserTower, self).__init__()

        # Item Embeddings for User History
        self.item_emb = nn.Embedding(n_items, embedding_dim)
        
        # A layer to project rating and timestamp into a scalar weight
        self.rating_proj = nn.Linear(2, 1)

        self.mlp = nn.Sequential(
            nn.Linear(input_dim + embedding_dim, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 384),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(384, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, embedding_dim)
        )
    
    def forward(self, batch):
        # Embed movieIds liked by user
        m = self.item_emb(batch['movies'])

        # Get weights from rating and timestamp
        x = torch.stack([batch['ratings'], batch['timestamps']], dim=-1)
        w = torch.sigmoid(self.rating_proj(x))

        # weighted mean-pool
        pooled = (m * w).sum(1) / (w.sum(1).clamp_min(1e-6))

        input = torch.cat([batch['df_users'], pooled], dim=-1)
        output = self.mlp(input)
        u = F.normalize(output, dim=1)
        return u

# Evaluation

In [None]:
import torch.nn.functional as F

def run_sanity_check(user_tower, item_tower, df_users, df_movies, df_ratings, movie_id_to_idx, 
                     movie_features, actor_idx_bag, actor_offsets,
                     director_idx_bag, director_offsets, genre_idx_bag, genre_offsets,
                     device):
    user_tower.eval()
    item_tower.eval()

    print("Running sanity check...")

    # Select a single user with at least 5 positive movies
    for _, row in df_ratings.iterrows():
        if len(row['pos']) >= 5:
            user_id = row['userId']
            break
    else:
        print("No user found with sufficient data for sanity check.")
        return

    # Prepare user input
    row_user = df_users[df_users['userId'] == user_id].iloc[0]
    history_ids = row['pos'][:-1]
    held_out_id = row['pos'][-1]

    user_input = {
        'movies': torch.tensor(history_ids, dtype=torch.long).unsqueeze(0).to(device),
        'ratings': torch.tensor([4.0]*len(history_ids), dtype=torch.float32).unsqueeze(0).to(device),
        'timestamps': torch.tensor([1.0]*len(history_ids), dtype=torch.float32).unsqueeze(0).to(device),
        'df_users': torch.tensor(row_user.drop(['userId', 'movies_seq', 'ratings_seq', 'ts_seq']).values.astype(np.float32)).unsqueeze(0).to(device)
    }

    # Compute user embedding
    with torch.no_grad():
        user_vec = user_tower(user_input)

    # Compute item embeddings
    with torch.no_grad():
        item_embs = item_tower(
            movie_features.to(device),
            actor_idx_bag.to(device), actor_offsets.to(device),
            director_idx_bag.to(device), director_offsets.to(device),
            genre_idx_bag.to(device), genre_offsets.to(device)
        )
        item_embs = F.normalize(item_embs, dim=1)

    # Compute cosine similarities
    sims = torch.matmul(user_vec, item_embs.T).squeeze(0)  # shape: (num_items,)
    top_scores, top_indices = torch.topk(sims, k=10)

    idx_to_movieId = {v: k for k, v in movie_id_to_idx.items()}
    top_movie_ids = [idx_to_movieId[i.item()] for i in top_indices]

    print(f"Sanity check for userId {user_id}:")
    print("Held-out movieId:", held_out_id)
    print("Top-10 recommended movieIds:", top_movie_ids)
    print("Hit:", held_out_id in top_movie_ids)


In [43]:
import torch
import torch.nn.functional as F
import numpy as np
from tqdm import tqdm

def evaluate_two_tower_model_batched(
    user_tower, item_tower,
    df_users, df_ratings, df_movies,
    movie_id_to_idx,
    movie_features, actor_idx_bag, actor_offsets,
    director_idx_bag, director_offsets,
    genre_idx_bag, genre_offsets,
    top_k=10, max_users=500, batch_size=32,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
):

    user_tower.eval()
    item_tower.eval()

    # PRECOMPUTED
    with torch.no_grad():
        item_embeddings = item_tower(
            movie_features.to(device),
            actor_idx_bag.to(device), actor_offsets.to(device),
            director_idx_bag.to(device), director_offsets.to(device),
            genre_idx_bag.to(device), genre_offsets.to(device)
        )
        item_embeddings = F.normalize(item_embeddings, dim=1)

    print("Evaluating users in batches...")
    user_ids = df_users['userId'].unique()
    if len(user_ids) > max_users:
        user_ids = np.random.choice(user_ids, size=max_users, replace=False)

    metrics = {'Precision@K': [], 'Recall@K': [], 'MRR': [], 'nDCG@K': []}

    def precision_at_k(true_item, recommended, k): return int(true_item in recommended[:k]) / k
    
    def recall_at_k(true_item, recommended, k): return int(true_item in recommended[:k])
    
    def mrr(true_item, recommended): return 1 / (recommended.index(true_item) + 1) if true_item in recommended else 0
    
    def ndcg_at_k(true_item, recommended, k):
        if true_item in recommended[:k]:
            rank = recommended.index(true_item)
            return 1 / np.log2(rank + 2)
        return 0.0

    for i in tqdm(range(0, len(user_ids), batch_size)):
        batch_user_ids = user_ids[i:i+batch_size]
        batch_inputs = {'movies': [], 'ratings': [], 'timestamps': [], 'df_users': []}
        held_out_items = []
        history_indices = []

        for user_id in batch_user_ids:
            row_u = df_users[df_users['userId'] == user_id].iloc[0]
            row_r = df_ratings[df_ratings['userId'] == user_id].iloc[0]
            pos_movies = row_r['pos']
            if len(pos_movies) < 2:
                continue

            held_out = pos_movies[-1]
            history = pos_movies[:-1]

            indices = [movie_id_to_idx[mid] for mid in history if mid in movie_id_to_idx]
            if not indices:
                continue

            batch_inputs['movies'].append(torch.tensor(indices, dtype=torch.long))
            batch_inputs['ratings'].append(torch.tensor([4.0]*len(indices), dtype=torch.float32))
            batch_inputs['timestamps'].append(torch.tensor([1.0]*len(indices), dtype=torch.float32))
            batch_inputs['df_users'].append(torch.tensor(row_u.drop(['userId', 'movies_seq', 'ratings_seq', 'ts_seq']).values.astype(np.float32)))

            held_out_items.append(held_out)
            history_indices.append(indices)

        if not held_out_items:
            continue

        max_len = max(len(seq) for seq in batch_inputs['movies'])
        for key in ['movies', 'ratings', 'timestamps']:
            batch_inputs[key] = torch.stack([
                F.pad(seq, (0, max_len - len(seq)), value=0) for seq in batch_inputs[key]
            ])

        batch_inputs['df_users'] = torch.stack(batch_inputs['df_users']).to(device)
        batch_inputs['movies'] = batch_inputs['movies'].to(device)
        batch_inputs['ratings'] = batch_inputs['ratings'].to(device)
        batch_inputs['timestamps'] = batch_inputs['timestamps'].to(device)

        with torch.no_grad():
            user_vecs = user_tower(batch_inputs)
            scores = torch.matmul(user_vecs, item_embeddings.T)

            for j, user_score in enumerate(scores):
                # Mask history
                user_score[history_indices[j]] = -1e9
                top_k_items = torch.topk(user_score, k=top_k).indices.tolist()
                idx_to_movieId = {v: k for k, v in movie_id_to_idx.items()}
                top_k_movie_ids = [idx_to_movieId[x] for x in top_k_items]

                true_item = held_out_items[j]
                metrics['Precision@K'].append(precision_at_k(true_item, top_k_movie_ids, top_k))
                metrics['Recall@K'].append(recall_at_k(true_item, top_k_movie_ids, top_k))
                metrics['MRR'].append(mrr(true_item, top_k_movie_ids))
                metrics['nDCG@K'].append(ndcg_at_k(true_item, top_k_movie_ids, top_k))

    return {k: np.mean(v) if v else 0.0 for k, v in metrics.items()}

# TRAINING

In [44]:
device = torch.device('cpu')
if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.mps.is_available():
    device = torch.device('mps')

print('Device:', device)

Device: cuda


In [45]:
def to_device(data, device):
    if isinstance(data, dict):
        return {k: to_device(v, device) for k, v in data.items()}
    elif torch.is_tensor(data):
        return data.to(device)
    else:
        return data

In [46]:
BATCH_SIZE = 128 #DEBUG: 4096

from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df_users, test_size=0.2)

movie_features, actor_idx_bag, actor_offsets, director_idx_bag, director_offsets, genre_idx_bag, genre_offsets, num_actors, num_directors, num_genres = prepare_feature_tensor(df_movies)

trainDataset = UserDataset(train_df)
trainDataLoader = DataLoader(trainDataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)

testDataset = UserDataset(test_df)
testDataLoader = DataLoader(testDataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)

In [47]:
EMB_DIM = 128

user_tower = UserTower(input_dim=25, n_items=n_items, embedding_dim=EMB_DIM).to(device)
item_tower = ItemTower(
    input_dim=movie_features.shape[1],
    embedding_dim=EMB_DIM,
    num_actors=num_actors,
    num_directors=num_directors,
    num_genres=num_genres
).to(device)


params = list(user_tower.parameters()) + list(item_tower.parameters())
optimizer = optim.Adam(params, lr=1e-3)

In [48]:
def train_one_epoch_two_tower(user_tower, item_tower, data_loader, optimizer, device, movie_features,
                              actor_idx_bag, actor_offsets,
                              director_idx_bag, director_offsets,
                              genre_idx_bag, genre_offsets):
    
    user_tower.train()
    item_tower.train()
    running_loss = 0.0
    total = 0
    
    movie_features = movie_features.to(device)
    actor_idx_bag = actor_idx_bag.to(device)
    actor_offsets = actor_offsets.to(device)
    director_idx_bag = director_idx_bag.to(device)
    director_offsets = director_offsets.to(device)
    genre_idx_bag = genre_idx_bag.to(device)
    genre_offsets = genre_offsets.to(device)

    for batch in data_loader:
        batch = to_device(batch, device)
        optimizer.zero_grad()

        user_vec = user_tower(batch['input'])

        actor_pos_bag, actor_pos_offsets = get_embedding_bag_inputs(batch['pos'], actor_idx_bag, actor_offsets)
        director_pos_bag, director_pos_offsets = get_embedding_bag_inputs(batch['pos'], director_idx_bag, director_offsets)
        genre_pos_bag, genre_pos_offsets = get_embedding_bag_inputs(batch['pos'], genre_idx_bag, genre_offsets)

        actor_neg_bag, actor_neg_offsets = get_embedding_bag_inputs(batch['neg'], actor_idx_bag, actor_offsets)
        director_neg_bag, director_neg_offsets = get_embedding_bag_inputs(batch['neg'], director_idx_bag, director_offsets)
        genre_neg_bag, genre_neg_offsets = get_embedding_bag_inputs(batch['neg'], genre_idx_bag, genre_offsets)
        
        #FOR DEBUGGING
        # print("max actor id in batch:", actor_pos_bag.max().item(), "num_actors:", item_tower.actor_embedding.num_embeddings)

        pos_vec = item_tower(movie_features[batch['pos']].to(device), actor_pos_bag.to(device), actor_pos_offsets.to(device),
                             director_pos_bag.to(device), director_pos_offsets.to(device),
                             genre_pos_bag.to(device), genre_pos_offsets.to(device))
        
        neg_vec = item_tower(movie_features[batch['neg']].to(device), actor_neg_bag.to(device), actor_neg_offsets.to(device),
                             director_neg_bag.to(device), director_neg_offsets.to(device),
                             genre_neg_bag.to(device), genre_neg_offsets.to(device))


        pos_score = (user_vec * pos_vec).sum(dim=-1)
        neg_score = (user_vec * neg_vec).sum(dim=-1)

        loss = -F.logsigmoid(pos_score - neg_score).mean()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        total += 1

    return running_loss / total

In [49]:
from tqdm import tqdm
from datetime import datetime
from sklearn.metrics import roc_auc_score

EPOCHS = 50
EVAL_EVERY = 5
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

for epoch in tqdm(range(EPOCHS), desc="Training Two-Tower"):
    avg_loss = train_one_epoch_two_tower(
        user_tower=user_tower,
        item_tower=item_tower,
        data_loader=trainDataLoader,
        optimizer=optimizer,
        device=device,
        movie_features = movie_features,
        actor_idx_bag=actor_idx_bag,
        actor_offsets=actor_offsets,
        director_idx_bag=director_idx_bag,
        director_offsets=director_offsets,
        genre_idx_bag=genre_idx_bag,
        genre_offsets=genre_offsets
    )
    
    print(f"[Epoch {epoch + 1}] | Loss: {avg_loss:.4f}")

    if epoch % EVAL_EVERY == (EVAL_EVERY - 1):
        user_tower.eval()
        item_tower.eval()

        aucs, pair_accs = [], []
        
        movie_features = movie_features.to(device)
        actor_idx_bag = actor_idx_bag.to(device)
        actor_offsets = actor_offsets.to(device)
        director_idx_bag = director_idx_bag.to(device)
        director_offsets = director_offsets.to(device)
        genre_idx_bag = genre_idx_bag.to(device)
        genre_offsets = genre_offsets.to(device)

        with torch.no_grad():
            item_emb = item_tower(
                movie_features.to(device),
                actor_idx_bag.to(device),
                actor_offsets.to(device),
                director_idx_bag.to(device),
                director_offsets.to(device),
                genre_idx_bag.to(device),
                genre_offsets.to(device)
            ).cpu().detach().numpy()

            for batch in testDataLoader:
                batch = to_device(batch, device)

                u = user_tower(batch['input'])

                actor_pos_bag, actor_pos_offsets = get_embedding_bag_inputs(batch['pos'], actor_idx_bag, actor_offsets)
                director_pos_bag, director_pos_offsets = get_embedding_bag_inputs(batch['pos'], director_idx_bag,director_offsets)
                genre_pos_bag, genre_pos_offsets = get_embedding_bag_inputs(batch['pos'], genre_idx_bag,genre_offsets)

                actor_neg_bag, actor_neg_offsets = get_embedding_bag_inputs(batch['neg'], actor_idx_bag,actor_offsets)
                director_neg_bag, director_neg_offsets = get_embedding_bag_inputs(batch['neg'], director_idx_bag, director_offsets)
                genre_neg_bag, genre_neg_offsets = get_embedding_bag_inputs(batch['neg'], genre_idx_bag, genre_offsets)

                pos_vec = item_tower(movie_features[batch['pos']].to(device), actor_pos_bag.to(device), actor_pos_offsets.to(device),
                                     director_pos_bag.to(device), director_pos_offsets.to(device),
                                     genre_pos_bag.to(device), genre_pos_offsets.to(device))
                
                neg_vec = item_tower(movie_features[batch['neg']].to(device), actor_neg_bag.to(device), actor_neg_offsets.to(device),
                                     director_neg_bag.to(device), director_neg_offsets.to(device),
                                     genre_neg_bag.to(device), genre_neg_offsets.to(device))

                pos_score = (u * pos_vec).sum(dim=-1)
                neg_score = (u * neg_vec).sum(dim=-1)

                labels = torch.cat([torch.ones_like(pos_score), torch.zeros_like(neg_score)])
                scores = torch.cat([pos_score, neg_score])

                aucs.append(roc_auc_score(labels.cpu(), scores.cpu()))
                pair_accs.append((pos_score > neg_score).float().mean().item())

        print(f"[Epoch {epoch + 1}] Pointwise Eval:")
        print(f"  ROC AUC:       {np.mean(aucs):.4f}")
        print(f"  Pairwise Acc:  {np.mean(pair_accs):.4f}")

        run_sanity_check(
            user_tower=user_tower,
            item_tower=item_tower,
            df_users=test_df,
            df_movies=df_movies,
            df_ratings=df_ratings,
            movie_id_to_idx=movieId_to_idx,
            movie_features=movie_features,
            actor_idx_bag=actor_idx_bag,
            actor_offsets=actor_offsets,
            director_idx_bag=director_idx_bag,
            director_offsets=director_offsets,
            genre_idx_bag=genre_idx_bag,
            genre_offsets=genre_offsets,
            device=device
        )

        rank_metrics = evaluate_two_tower_model_batched(
            user_tower=user_tower,
            item_tower=item_tower,
            df_users=test_df,           # ← has user profiles & sequences
            df_ratings=df_ratings,      # ← has pos/neg lists
            df_movies=df_movies,        # ← all movie features
            movie_id_to_idx = df_movies.set_index('movieId')['movie_idx'].to_dict(),
            top_k=10,
            max_users=100, #DEBUG: 1000
            batch_size=8, #DEBUG: 32
            device=device,
            movie_features=movie_features,
            actor_idx_bag=actor_idx_bag,
            actor_offsets=actor_offsets,
            director_idx_bag=director_idx_bag,
            director_offsets=director_offsets,
            genre_idx_bag=genre_idx_bag,
            genre_offsets=genre_offsets
        )

        print(f"[Epoch {epoch + 1}] Retrieval Eval:")
        print(f"  Precision@K:   {rank_metrics['Precision@K']:.6f}")
        print(f"  Recall@K:      {rank_metrics['Recall@K']:.6f}")
        print(f"  MRR:           {rank_metrics['MRR']:.6f}")
        print(f"  nDCG@K:        {rank_metrics['nDCG@K']:.6f}")

Training Two-Tower:   2%|▏         | 1/50 [00:02<02:13,  2.71s/it]

[Epoch 1] | Loss: 0.6940


Training Two-Tower:   4%|▍         | 2/50 [00:05<02:16,  2.85s/it]

[Epoch 2] | Loss: 0.6918


Training Two-Tower:   6%|▌         | 3/50 [00:08<02:12,  2.82s/it]

[Epoch 3] | Loss: 0.6892


Training Two-Tower:   8%|▊         | 4/50 [00:11<02:05,  2.73s/it]

[Epoch 4] | Loss: 0.6915
[Epoch 5] | Loss: 0.6934
[Epoch 5] Pointwise Eval:
  ROC AUC:       0.5516
  Pairwise Acc:  0.5505
Evaluating users in batches...



  0%|          | 0/13 [00:00<?, ?it/s][A
 15%|█▌        | 2/13 [00:00<00:00, 11.63it/s][A
 31%|███       | 4/13 [00:00<00:00, 10.48it/s][A
 46%|████▌     | 6/13 [00:00<00:00, 10.71it/s][A
 62%|██████▏   | 8/13 [00:00<00:00, 10.44it/s][A
 77%|███████▋  | 10/13 [00:00<00:00, 10.24it/s][A
100%|██████████| 13/13 [00:01<00:00, 10.76it/s][A
Training Two-Tower:  10%|█         | 5/50 [00:15<02:24,  3.22s/it]

[Epoch 5] Retrieval Eval:
  Precision@K:   0.0000
  Recall@K:      0.0000
  MRR:           0.0000
  nDCG@K:        0.0000


Training Two-Tower:  12%|█▏        | 6/50 [00:17<02:13,  3.03s/it]

[Epoch 6] | Loss: 0.6893


Training Two-Tower:  14%|█▍        | 7/50 [00:20<02:07,  2.96s/it]

[Epoch 7] | Loss: 0.6794


Training Two-Tower:  16%|█▌        | 8/50 [00:23<02:01,  2.88s/it]

[Epoch 8] | Loss: 0.6894


Training Two-Tower:  18%|█▊        | 9/50 [00:26<01:56,  2.84s/it]

[Epoch 9] | Loss: 0.6785
[Epoch 10] | Loss: 0.6734
[Epoch 10] Pointwise Eval:
  ROC AUC:       0.5487
  Pairwise Acc:  0.5508
Evaluating users in batches...



  0%|          | 0/13 [00:00<?, ?it/s][A
  8%|▊         | 1/13 [00:00<00:01,  9.62it/s][A
 23%|██▎       | 3/13 [00:00<00:00, 10.12it/s][A
 38%|███▊      | 5/13 [00:00<00:00, 10.41it/s][A
 54%|█████▍    | 7/13 [00:00<00:00, 10.94it/s][A
 69%|██████▉   | 9/13 [00:00<00:00, 11.00it/s][A
 85%|████████▍ | 11/13 [00:01<00:00, 10.85it/s][A
100%|██████████| 13/13 [00:01<00:00, 11.19it/s][A
Training Two-Tower:  20%|██        | 10/50 [00:30<02:15,  3.39s/it]

[Epoch 10] Retrieval Eval:
  Precision@K:   0.0000
  Recall@K:      0.0000
  MRR:           0.0000
  nDCG@K:        0.0000


Training Two-Tower:  22%|██▏       | 11/50 [00:32<01:58,  3.05s/it]

[Epoch 11] | Loss: 0.6773


Training Two-Tower:  24%|██▍       | 12/50 [00:35<01:50,  2.91s/it]

[Epoch 12] | Loss: 0.6672


Training Two-Tower:  26%|██▌       | 13/50 [00:38<01:49,  2.96s/it]

[Epoch 13] | Loss: 0.6707


Training Two-Tower:  28%|██▊       | 14/50 [00:41<01:43,  2.87s/it]

[Epoch 14] | Loss: 0.6699
[Epoch 15] | Loss: 0.6614
[Epoch 15] Pointwise Eval:
  ROC AUC:       0.5892
  Pairwise Acc:  0.5797
Evaluating users in batches...



  0%|          | 0/13 [00:00<?, ?it/s][A
  8%|▊         | 1/13 [00:00<00:01,  9.90it/s][A
 23%|██▎       | 3/13 [00:00<00:00, 11.76it/s][A
 38%|███▊      | 5/13 [00:00<00:00, 10.55it/s][A
 54%|█████▍    | 7/13 [00:00<00:00, 10.49it/s][A
 69%|██████▉   | 9/13 [00:00<00:00, 10.01it/s][A
 85%|████████▍ | 11/13 [00:01<00:00,  9.43it/s][A
100%|██████████| 13/13 [00:01<00:00, 10.11it/s][A
Training Two-Tower:  30%|███       | 15/50 [00:45<01:54,  3.28s/it]

[Epoch 15] Retrieval Eval:
  Precision@K:   0.0000
  Recall@K:      0.0000
  MRR:           0.0000
  nDCG@K:        0.0000


Training Two-Tower:  32%|███▏      | 16/50 [00:48<01:44,  3.06s/it]

[Epoch 16] | Loss: 0.6691


Training Two-Tower:  34%|███▍      | 17/50 [00:50<01:37,  2.95s/it]

[Epoch 17] | Loss: 0.6420


Training Two-Tower:  36%|███▌      | 18/50 [00:53<01:33,  2.92s/it]

[Epoch 18] | Loss: 0.6502


Training Two-Tower:  38%|███▊      | 19/50 [00:56<01:28,  2.87s/it]

[Epoch 19] | Loss: 0.6587
[Epoch 20] | Loss: 0.6194
[Epoch 20] Pointwise Eval:
  ROC AUC:       0.5593
  Pairwise Acc:  0.5220
Evaluating users in batches...



  0%|          | 0/13 [00:00<?, ?it/s][A
 15%|█▌        | 2/13 [00:00<00:01, 10.75it/s][A
 31%|███       | 4/13 [00:00<00:00, 11.00it/s][A
 46%|████▌     | 6/13 [00:00<00:00, 10.94it/s][A
 62%|██████▏   | 8/13 [00:00<00:00, 10.89it/s][A
 77%|███████▋  | 10/13 [00:00<00:00, 10.84it/s][A
100%|██████████| 13/13 [00:01<00:00, 11.12it/s][A
Training Two-Tower:  40%|████      | 20/50 [01:01<01:42,  3.42s/it]

[Epoch 20] Retrieval Eval:
  Precision@K:   0.0000
  Recall@K:      0.0000
  MRR:           0.0000
  nDCG@K:        0.0000


Training Two-Tower:  42%|████▏     | 21/50 [01:03<01:33,  3.24s/it]

[Epoch 21] | Loss: 0.6387


Training Two-Tower:  44%|████▍     | 22/50 [01:06<01:26,  3.07s/it]

[Epoch 22] | Loss: 0.6116


Training Two-Tower:  46%|████▌     | 23/50 [01:09<01:19,  2.95s/it]

[Epoch 23] | Loss: 0.6395


Training Two-Tower:  48%|████▊     | 24/50 [01:11<01:13,  2.84s/it]

[Epoch 24] | Loss: 0.6640
[Epoch 25] | Loss: 0.6411
[Epoch 25] Pointwise Eval:
  ROC AUC:       0.5791
  Pairwise Acc:  0.5920
Evaluating users in batches...



  0%|          | 0/13 [00:00<?, ?it/s][A
 15%|█▌        | 2/13 [00:00<00:00, 11.97it/s][A
 31%|███       | 4/13 [00:00<00:00, 11.26it/s][A
 46%|████▌     | 6/13 [00:00<00:00, 11.86it/s][A
 62%|██████▏   | 8/13 [00:00<00:00, 11.91it/s][A
 77%|███████▋  | 10/13 [00:00<00:00, 11.61it/s][A
100%|██████████| 13/13 [00:01<00:00, 12.23it/s][A
Training Two-Tower:  50%|█████     | 25/50 [01:16<01:23,  3.35s/it]

[Epoch 25] Retrieval Eval:
  Precision@K:   0.0000
  Recall@K:      0.0000
  MRR:           0.0000
  nDCG@K:        0.0000


Training Two-Tower:  52%|█████▏    | 26/50 [01:18<01:14,  3.11s/it]

[Epoch 26] | Loss: 0.6028


Training Two-Tower:  52%|█████▏    | 26/50 [01:20<01:13,  3.08s/it]


KeyboardInterrupt: 

In [None]:
torch.save(user_tower.state_dict(), f'user_tower_{timestamp}.pt')
torch.save(item_tower.state_dict(), f'item_tower_{timestamp}.pt')