In [45]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

import torch 
from feature_processor import FeatureProcessor

from candidate_generator import CandidateGenerator
from validate_model import validate_model_with_features
from ncf import NCF
from cold_start import ColdStartRecommender

np.random.seed(123)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
ratings = pd.read_csv("../two_towers/data/ml-1m/ratings.dat", sep="::", header=None)
ratings.columns = ["user_id", "movie_id", "rating", "timestamp"]

movies = pd.read_csv("../two_towers/data/ml-1m/movies.dat", sep="::", header=None)
movies.columns = ["movie_id", "title", "genres"]

users = pd.read_csv("../two_towers/data/ml-1m/users.dat", sep="::", header=None)
users.columns = ["user_id", "gender", "age", "occupation", "zip_code"]


  ratings = pd.read_csv("../two_towers/data/ml-1m/ratings.dat", sep="::", header=None)
  movies = pd.read_csv("../two_towers/data/ml-1m/movies.dat", sep="::", header=None)
  users = pd.read_csv("../two_towers/data/ml-1m/users.dat", sep="::", header=None)


In [20]:
rand_userIds = np.random.choice(ratings['user_id'].unique(),
                               size=int(len(ratings['user_id'].unique())*0.1),
                               replace=False)
ratings = ratings.loc[ratings['user_id'].isin(rand_userIds)]

print(ratings.head())

     user_id  movie_id  rating  timestamp
799       10      2622       5  978228212
800       10       648       4  978224925
801       10      2628       3  978228408
802       10      3358       5  978226378
803       10      3359       3  978227125


In [21]:
# Initialize feature processor and prepare features
print("Preparing user and movie features...")

# Initialize feature processor
feature_processor = FeatureProcessor()

# Prepare user features
user_features_df = feature_processor.prepare_user_features(users)

# Prepare movie features  
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
movie_embeddings = feature_processor.prepare_movie_features(movies, device=device)

print(f"\nFeature preparation complete!")
print(f"User feature dimension: {feature_processor.user_feature_dim}")
print(f"Movie feature dimension: {feature_processor.movie_feature_dim}")


Preparing user and movie features...
Preparing user features...
Fitting sklearn encoders...
Gender categories: ['F' 'M']
Age categories: [ 1 18 25 35 45 50 56]
Occupation categories: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
User features shape: (6040, 30)
User feature columns: ['user_id', 'gender', 'age_1', 'age_18', 'age_25', 'age_35', 'age_45', 'age_50', 'age_56', 'occ_0', 'occ_1', 'occ_2', 'occ_3', 'occ_4', 'occ_5', 'occ_6', 'occ_7', 'occ_8', 'occ_9', 'occ_10', 'occ_11', 'occ_12', 'occ_13', 'occ_14', 'occ_15', 'occ_16', 'occ_17', 'occ_18', 'occ_19', 'occ_20']
User feature dtypes:
user_id    float64
gender     float64
age_1      float64
age_18     float64
age_25     float64
age_35     float64
age_45     float64
age_50     float64
age_56     float64
occ_0      float64
occ_1      float64
occ_2      float64
occ_3      float64
occ_4      float64
occ_5      float64
occ_6      float64
occ_7      float64
occ_8      float64
occ_9      float64
occ_10     float64
occ_11

In [22]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
Index: 97208 entries, 799 to 998118
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   user_id    97208 non-null  int64
 1   movie_id   97208 non-null  int64
 2   rating     97208 non-null  int64
 3   timestamp  97208 non-null  int64
dtypes: int64(4)
memory usage: 3.7 MB


In [23]:
ratings['rank_latest'] = ratings.groupby(['user_id'])['timestamp'] \
                                .rank(method = 'first',ascending=False)

train_ratings = ratings[ratings['rank_latest'].isin([1,2])]
validation_ratings = ratings[ratings['rank_latest'] == 1]
test_ratings = ratings[ratings['rank_latest'] == 2]

# drop columns that we no Longer need 
train_ratings = train_ratings[['user_id', 'movie_id', 'rating']]
test_ratings = test_ratings[['user_id','movie_id','rating']]

In [24]:
num_users = ratings['user_id'].max()+1
num_items = ratings['movie_id'].max()+1

all_movieIds = ratings['movie_id'].unique()

In [25]:


candidate_gen = CandidateGenerator(ratings, movies, all_movieIds)

def generate_candidates(user_id, method="hybrid", num_candidates=100):
    """Backward compatible interface"""
    return candidate_gen.generate_candidates(user_id, method, num_candidates)

In [26]:
# Precompute validation candidates for faster validation
def precompute_validation_candidates(validation_ratings, candidate_method="hybrid", num_candidates=100):
    """
    Precompute candidates for all validation users to speed up validation
    
    Returns:
        dict: {user_id: [candidate_items]}
    """
    validation_users = validation_ratings['user_id'].unique()
    precomputed_candidates = {}
    
    print(f"Precomputing candidates for {len(validation_users)} validation users...")
    
    for user_id in tqdm(validation_users, desc="Precomputing candidates"):
        candidates = generate_candidates(user_id, method=candidate_method, num_candidates=num_candidates)
        precomputed_candidates[user_id] = candidates
    
    print(f"Precomputed candidates for {len(precomputed_candidates)} users")
    return precomputed_candidates

# Precompute candidates before training
validation_candidates = precompute_validation_candidates(validation_ratings, candidate_method="hybrid", num_candidates=100)

Precomputing candidates for 604 validation users...


Precomputing candidates:   0%|          | 0/604 [00:00<?, ?it/s]

Precomputed candidates for 604 users


In [39]:
sampling_strategies = [
    ("unique_per_user", "hybrid"),
]
NUM_EPOCHS = 30
results = {}

for sampling_strategy, neg_method in sampling_strategies:
    print(f"\n{'='*70}")
    print(f"Training with {sampling_strategy.upper()} sampling + {neg_method.upper()} negatives")
    print(f"{'='*70}")
    
    # Create optimized model with CORRECT feature dimensions
    model = NCF(
        user_feature_dim=feature_processor.user_feature_dim,
        movie_feature_dim=feature_processor.movie_feature_dim,
        ratings=train_ratings, 
        feature_processor=feature_processor,
        candidate_generator=candidate_gen,
        negative_method=neg_method, 
        sampling_strategy=sampling_strategy
    )

    # Set up training parameters
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters())
    dataloader = model.get_dataloader(batch_size=512, num_workers=4, num_negatives=4)

    # Training loop
    num_epochs = NUM_EPOCHS
    
    epoch_results = []
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        total_loss = 0
        num_batches = 0
        
        for batch in tqdm(dataloader, desc=f"Training Epoch {epoch+1}/{num_epochs}"):
            # Move batch to device
            user_input, item_input, labels = [x.to(device) for x in batch]
            batch_device = (user_input, item_input, labels)
            
            # Zero gradients
            optimizer.zero_grad()
            
            # Forward pass and compute loss
            loss = model.compute_loss(batch_device)
            
            # Backward pass
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            num_batches += 1
        
        avg_loss = total_loss / num_batches
        
        # Validation phase
        model.eval()
        with torch.no_grad():
            hit_ratio, mrr, mean_rank = validate_model_with_features(
                model, validation_ratings, validation_candidates, device, 
                total_users_to_test=20, k=10
            )
        
        epoch_results.append({
            'epoch': epoch + 1,
            'train_loss': avg_loss,
            'hit_ratio': hit_ratio,
            'mean_rank': mean_rank
        })
        
        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"  Training Loss: {avg_loss:.4f}")
        print(f"  Hit Ratio @ 10: {hit_ratio:.3f}")
        print(f"  Mean Rank: {mean_rank:.1f}")
        print("-" * 50)

    # Store results
    strategy_name = f"{sampling_strategy}_{neg_method}"
    results[strategy_name] = epoch_results
    
    print(f"Training with {strategy_name} completed!")
    print(f"Final Hit Ratio @ 10: {hit_ratio:.3f}")
    print(f"Final Mean Rank: {mean_rank:.1f}")

# Summary of results
print(f"\n{'='*70}")
print("FINAL RESULTS SUMMARY")
print(f"{'='*70}")

for strategy_name, epoch_results in results.items():
    final_result = epoch_results[-1]
    print(f"{strategy_name:30} | Hit Ratio: {final_result['hit_ratio']:.3f} | Mean Rank: {final_result['mean_rank']:.1f}")

print("\nAll experiments completed!")



Training with UNIQUE_PER_USER sampling + HYBRID negatives
Generating training dataset with hybrid negative sampling
Sampling strategy: unique_per_user


Processing users: 100%|██████████| 604/604 [00:02<00:00, 237.99it/s]

Generated 6040 samples (1208 positive, 4832 negative)
Negative-to-positive ratio: 4.00





Training Epoch 1/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.39024150371551514, Rank=19/101
User 3912, Movie 318: Score=0.38181254267692566, Rank=26/101
User 1878, Movie 1920: Score=0.3883642256259918, Rank=29/101


100%|██████████| 20/20 [00:00<00:00, 41.37it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.100
Mean Rank: 38.6
MRR: 0.087
Epoch 1/30
  Training Loss: 0.5574
  Hit Ratio @ 10: 0.100
  Mean Rank: 38.6
--------------------------------------------------





Training Epoch 2/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.320016086101532, Rank=11/101
User 3912, Movie 318: Score=0.2788303792476654, Rank=33/101
User 1878, Movie 1920: Score=0.3007119297981262, Rank=31/101


100%|██████████| 20/20 [00:00<00:00, 41.10it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.100
Mean Rank: 32.5
MRR: 0.089
Epoch 2/30
  Training Loss: 0.4394
  Hit Ratio @ 10: 0.100
  Mean Rank: 32.5
--------------------------------------------------





Training Epoch 3/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.3926253318786621, Rank=16/101
User 3912, Movie 318: Score=0.3686119318008423, Rank=44/101
User 1878, Movie 1920: Score=0.37502428889274597, Rank=30/101


100%|██████████| 20/20 [00:00<00:00, 39.61it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.200
Mean Rank: 30.8
MRR: 0.054
Epoch 3/30
  Training Loss: 0.4029
  Hit Ratio @ 10: 0.200
  Mean Rank: 30.8
--------------------------------------------------





Training Epoch 4/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.4161815345287323, Rank=13/101
User 3912, Movie 318: Score=0.376687616109848, Rank=58/101
User 1878, Movie 1920: Score=0.41639643907546997, Rank=15/101


100%|██████████| 20/20 [00:00<00:00, 42.35it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.250
Mean Rank: 26.5
MRR: 0.080
Epoch 4/30
  Training Loss: 0.3769
  Hit Ratio @ 10: 0.250
  Mean Rank: 26.5
--------------------------------------------------





Training Epoch 5/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.44042345881462097, Rank=12/101
User 3912, Movie 318: Score=0.22568640112876892, Rank=73/101
User 1878, Movie 1920: Score=0.4424423575401306, Rank=13/101


100%|██████████| 20/20 [00:00<00:00, 42.11it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.250
Mean Rank: 26.5
MRR: 0.097
Epoch 5/30
  Training Loss: 0.3676
  Hit Ratio @ 10: 0.250
  Mean Rank: 26.5
--------------------------------------------------





Training Epoch 6/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.4546666145324707, Rank=11/101
User 3912, Movie 318: Score=0.19287574291229248, Rank=58/101
User 1878, Movie 1920: Score=0.46924665570259094, Rank=2/101


100%|██████████| 20/20 [00:00<00:00, 41.47it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.250
Mean Rank: 21.0
MRR: 0.122
Epoch 6/30
  Training Loss: 0.3651
  Hit Ratio @ 10: 0.250
  Mean Rank: 21.0
--------------------------------------------------





Training Epoch 7/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.4700664281845093, Rank=18/101
User 3912, Movie 318: Score=0.15355516970157623, Rank=65/101
User 1878, Movie 1920: Score=0.4830549657344818, Rank=8/101


100%|██████████| 20/20 [00:00<00:00, 40.84it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.200
Mean Rank: 23.3
MRR: 0.094
Epoch 7/30
  Training Loss: 0.3530
  Hit Ratio @ 10: 0.200
  Mean Rank: 23.3
--------------------------------------------------





Training Epoch 8/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.49454158544540405, Rank=20/101
User 3912, Movie 318: Score=0.07617317140102386, Rank=69/101
User 1878, Movie 1920: Score=0.5052772760391235, Rank=12/101


100%|██████████| 20/20 [00:00<00:00, 41.07it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.200
Mean Rank: 24.1
MRR: 0.107
Epoch 8/30
  Training Loss: 0.3423
  Hit Ratio @ 10: 0.200
  Mean Rank: 24.1
--------------------------------------------------





Training Epoch 9/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.5224889516830444, Rank=14/101
User 3912, Movie 318: Score=0.09268729388713837, Rank=63/101
User 1878, Movie 1920: Score=0.5381098985671997, Rank=12/101


100%|██████████| 20/20 [00:00<00:00, 41.66it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.200
Mean Rank: 20.0
MRR: 0.150
Epoch 9/30
  Training Loss: 0.3341
  Hit Ratio @ 10: 0.200
  Mean Rank: 20.0
--------------------------------------------------





Training Epoch 10/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.5720018744468689, Rank=10/101
User 3912, Movie 318: Score=0.09877099841833115, Rank=62/101
User 1878, Movie 1920: Score=0.5772178173065186, Rank=9/101


100%|██████████| 20/20 [00:00<00:00, 42.23it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.450
Mean Rank: 18.4
MRR: 0.149
Epoch 10/30
  Training Loss: 0.3283
  Hit Ratio @ 10: 0.450
  Mean Rank: 18.4
--------------------------------------------------





Training Epoch 11/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.5817703008651733, Rank=15/101
User 3912, Movie 318: Score=0.09729834645986557, Rank=63/101
User 1878, Movie 1920: Score=0.5860705971717834, Rank=12/101


100%|██████████| 20/20 [00:00<00:00, 42.21it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.200
Mean Rank: 21.0
MRR: 0.124
Epoch 11/30
  Training Loss: 0.3251
  Hit Ratio @ 10: 0.200
  Mean Rank: 21.0
--------------------------------------------------





Training Epoch 12/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.5766670107841492, Rank=15/101
User 3912, Movie 318: Score=0.10292726010084152, Rank=62/101
User 1878, Movie 1920: Score=0.5127536058425903, Rank=25/101


100%|██████████| 20/20 [00:00<00:00, 42.12it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.400
Mean Rank: 17.1
MRR: 0.133
Epoch 12/30
  Training Loss: 0.3197
  Hit Ratio @ 10: 0.400
  Mean Rank: 17.1
--------------------------------------------------





Training Epoch 13/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.6119335889816284, Rank=15/101
User 3912, Movie 318: Score=0.09034378826618195, Rank=67/101
User 1878, Movie 1920: Score=0.46933114528656006, Rank=33/101


100%|██████████| 20/20 [00:00<00:00, 42.00it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.300
Mean Rank: 21.8
MRR: 0.130
Epoch 13/30
  Training Loss: 0.3103
  Hit Ratio @ 10: 0.300
  Mean Rank: 21.8
--------------------------------------------------





Training Epoch 14/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.5916625261306763, Rank=12/101
User 3912, Movie 318: Score=0.19139578938484192, Rank=49/101
User 1878, Movie 1920: Score=0.5773981809616089, Rank=19/101


100%|██████████| 20/20 [00:00<00:00, 41.06it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.500
Mean Rank: 15.2
MRR: 0.129
Epoch 14/30
  Training Loss: 0.3044
  Hit Ratio @ 10: 0.500
  Mean Rank: 15.2
--------------------------------------------------





Training Epoch 15/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.6447447538375854, Rank=10/101
User 3912, Movie 318: Score=0.0924079567193985, Rank=62/101
User 1878, Movie 1920: Score=0.5098416805267334, Rank=25/101


100%|██████████| 20/20 [00:00<00:00, 39.65it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.450
Mean Rank: 17.9
MRR: 0.139
Epoch 15/30
  Training Loss: 0.2916
  Hit Ratio @ 10: 0.450
  Mean Rank: 17.9
--------------------------------------------------





Training Epoch 16/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.6680415868759155, Rank=12/101
User 3912, Movie 318: Score=0.0682002604007721, Rank=64/101
User 1878, Movie 1920: Score=0.5408997535705566, Rank=21/101


100%|██████████| 20/20 [00:00<00:00, 41.95it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.450
Mean Rank: 17.0
MRR: 0.157
Epoch 16/30
  Training Loss: 0.2888
  Hit Ratio @ 10: 0.450
  Mean Rank: 17.0
--------------------------------------------------





Training Epoch 17/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.6536995768547058, Rank=13/101
User 3912, Movie 318: Score=0.09662333130836487, Rank=63/101
User 1878, Movie 1920: Score=0.543708086013794, Rank=21/101


100%|██████████| 20/20 [00:00<00:00, 41.39it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.400
Mean Rank: 17.7
MRR: 0.147
Epoch 17/30
  Training Loss: 0.2827
  Hit Ratio @ 10: 0.400
  Mean Rank: 17.7
--------------------------------------------------





Training Epoch 18/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.44472983479499817, Rank=19/101
User 3912, Movie 318: Score=0.13351815938949585, Rank=61/101
User 1878, Movie 1920: Score=0.6491295695304871, Rank=15/101


100%|██████████| 20/20 [00:00<00:00, 41.12it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.550
Mean Rank: 15.6
MRR: 0.171
Epoch 18/30
  Training Loss: 0.2802
  Hit Ratio @ 10: 0.550
  Mean Rank: 15.6
--------------------------------------------------





Training Epoch 19/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.7148324847221375, Rank=12/101
User 3912, Movie 318: Score=0.06779976934194565, Rank=65/101
User 1878, Movie 1920: Score=0.39632555842399597, Rank=31/101


100%|██████████| 20/20 [00:00<00:00, 39.18it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.450
Mean Rank: 17.5
MRR: 0.135
Epoch 19/30
  Training Loss: 0.2720
  Hit Ratio @ 10: 0.450
  Mean Rank: 17.5
--------------------------------------------------





Training Epoch 20/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.6669723987579346, Rank=9/101
User 3912, Movie 318: Score=0.13474677503108978, Rank=56/101
User 1878, Movie 1920: Score=0.6270558834075928, Rank=14/101


100%|██████████| 20/20 [00:00<00:00, 42.01it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.600
Mean Rank: 14.9
MRR: 0.127
Epoch 20/30
  Training Loss: 0.2752
  Hit Ratio @ 10: 0.600
  Mean Rank: 14.9
--------------------------------------------------





Training Epoch 21/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.7455516457557678, Rank=9/101
User 3912, Movie 318: Score=0.10260410606861115, Rank=59/101
User 1878, Movie 1920: Score=0.3302871882915497, Rank=30/101


100%|██████████| 20/20 [00:00<00:00, 41.74it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.550
Mean Rank: 16.2
MRR: 0.139
Epoch 21/30
  Training Loss: 0.2618
  Hit Ratio @ 10: 0.550
  Mean Rank: 16.2
--------------------------------------------------





Training Epoch 22/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.7472551465034485, Rank=10/101
User 3912, Movie 318: Score=0.12818561494350433, Rank=58/101
User 1878, Movie 1920: Score=0.5799974799156189, Rank=23/101


100%|██████████| 20/20 [00:00<00:00, 41.78it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.500
Mean Rank: 17.8
MRR: 0.115
Epoch 22/30
  Training Loss: 0.2590
  Hit Ratio @ 10: 0.500
  Mean Rank: 17.8
--------------------------------------------------





Training Epoch 23/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.8194998502731323, Rank=9/101
User 3912, Movie 318: Score=0.11521369963884354, Rank=57/101
User 1878, Movie 1920: Score=0.6517413258552551, Rank=19/101


100%|██████████| 20/20 [00:00<00:00, 41.62it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.600
Mean Rank: 15.9
MRR: 0.141
Epoch 23/30
  Training Loss: 0.2572
  Hit Ratio @ 10: 0.600
  Mean Rank: 15.9
--------------------------------------------------





Training Epoch 24/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.7937357425689697, Rank=10/101
User 3912, Movie 318: Score=0.0793594941496849, Rank=60/101
User 1878, Movie 1920: Score=0.5654357075691223, Rank=25/101


100%|██████████| 20/20 [00:00<00:00, 42.03it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.500
Mean Rank: 18.2
MRR: 0.090
Epoch 24/30
  Training Loss: 0.2533
  Hit Ratio @ 10: 0.500
  Mean Rank: 18.2
--------------------------------------------------





Training Epoch 25/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.7652862071990967, Rank=10/101
User 3912, Movie 318: Score=0.05492164567112923, Rank=63/101
User 1878, Movie 1920: Score=0.39381900429725647, Rank=25/101


100%|██████████| 20/20 [00:00<00:00, 41.67it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.500
Mean Rank: 18.1
MRR: 0.125
Epoch 25/30
  Training Loss: 0.2526
  Hit Ratio @ 10: 0.500
  Mean Rank: 18.1
--------------------------------------------------





Training Epoch 26/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.7965471148490906, Rank=11/101
User 3912, Movie 318: Score=0.06388700753450394, Rank=61/101
User 1878, Movie 1920: Score=0.5050593018531799, Rank=21/101


100%|██████████| 20/20 [00:00<00:00, 41.44it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.350
Mean Rank: 18.0
MRR: 0.119
Epoch 26/30
  Training Loss: 0.2518
  Hit Ratio @ 10: 0.350
  Mean Rank: 18.0
--------------------------------------------------





Training Epoch 27/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.7652727365493774, Rank=10/101
User 3912, Movie 318: Score=0.08299318701028824, Rank=60/101
User 1878, Movie 1920: Score=0.4604056179523468, Rank=25/101


100%|██████████| 20/20 [00:00<00:00, 41.06it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.500
Mean Rank: 16.5
MRR: 0.119
Epoch 27/30
  Training Loss: 0.2455
  Hit Ratio @ 10: 0.500
  Mean Rank: 16.5
--------------------------------------------------





Training Epoch 28/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.802591860294342, Rank=11/101
User 3912, Movie 318: Score=0.10545267164707184, Rank=57/101
User 1878, Movie 1920: Score=0.4347088038921356, Rank=26/101


100%|██████████| 20/20 [00:00<00:00, 41.93it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.450
Mean Rank: 16.0
MRR: 0.134
Epoch 28/30
  Training Loss: 0.2322
  Hit Ratio @ 10: 0.450
  Mean Rank: 16.0
--------------------------------------------------





Training Epoch 29/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.8691232204437256, Rank=9/101
User 3912, Movie 318: Score=0.07710125297307968, Rank=60/101
User 1878, Movie 1920: Score=0.4689476191997528, Rank=22/101


100%|██████████| 20/20 [00:00<00:00, 41.67it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.450
Mean Rank: 16.9
MRR: 0.134
Epoch 29/30
  Training Loss: 0.2427
  Hit Ratio @ 10: 0.450
  Mean Rank: 16.9
--------------------------------------------------





Training Epoch 30/30:   0%|          | 0/12 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

User 5608, Movie 3160: Score=0.7707292437553406, Rank=10/101
User 3912, Movie 318: Score=0.09511437267065048, Rank=54/101
User 1878, Movie 1920: Score=0.5016394257545471, Rank=18/101


100%|██████████| 20/20 [00:00<00:00, 41.95it/s]


Validation Summary:
Total test cases: 20
Skipped cases: 0
Valid cases processed: 20
Hit Ratio @ 10: 0.500
Mean Rank: 16.2
MRR: 0.117
Epoch 30/30
  Training Loss: 0.2365
  Hit Ratio @ 10: 0.500
  Mean Rank: 16.2
--------------------------------------------------
Training with unique_per_user_hybrid completed!
Final Hit Ratio @ 10: 0.500
Final Mean Rank: 16.2

FINAL RESULTS SUMMARY
unique_per_user_hybrid         | Hit Ratio: 0.500 | Mean Rank: 16.2

All experiments completed!





In [40]:
print("="*80)
print("MODEL SAVE AND LOAD DEMONSTRATION")
print("="*80)

# Save the trained model
model_save_path = "models/ncf_trained_model.pth"
print(f"\n1. SAVING MODEL")
print("-" * 50)

# Create models directory if it doesn't exist
import os
os.makedirs("models", exist_ok=True)

# Save the trained model
model.save_weights(model_save_path)

MODEL SAVE AND LOAD DEMONSTRATION

1. SAVING MODEL
--------------------------------------------------
Model weights saved to models/ncf_trained_model.pth


In [41]:
print("Testing model performance on test set...")

def precompute_test_candidates(test_ratings, candidate_method="hybrid", num_candidates=100):
    test_users = test_ratings['user_id'].unique()
    precomputed_candidates = {}
    
    print(f"Precomputing candidates for {len(test_users)} test users...")
    
    for user_id in tqdm(test_users, desc="Precomputing test candidates"):
        candidates = generate_candidates(user_id, method=candidate_method, num_candidates=num_candidates)
        precomputed_candidates[user_id] = candidates
    
    return precomputed_candidates

test_candidates = precompute_test_candidates(test_ratings, candidate_method="hybrid", num_candidates=100)

model.eval()
with torch.no_grad():
    print(f"Evaluating on test set...")
    test_hit_ratio, test_mrr, test_mean_rank = validate_model_with_features(
        model, test_ratings, test_candidates, device, 
        total_users_to_test=50, k=10
    )

print(f"TEST SET RESULTS:")
print(f"Hit Ratio @ 10: {test_hit_ratio:.3f}")
print(f"Mean Rank: {test_mean_rank:.1f}")
print(f"MRR: {test_mrr:.3f}")


Testing model performance on test set...
Precomputing candidates for 604 test users...


Precomputing test candidates:   0%|          | 0/604 [00:00<?, ?it/s]

Evaluating on test set...


  8%|▊         | 4/50 [00:00<00:01, 39.24it/s]

User 2898, Movie 2502: Score=0.9038405418395996, Rank=4/101
User 4689, Movie 3793: Score=0.8114569187164307, Rank=5/101
User 3138, Movie 1580: Score=0.22729003429412842, Rank=26/101


100%|██████████| 50/50 [00:01<00:00, 40.50it/s]


Validation Summary:
Total test cases: 50
Skipped cases: 0
Valid cases processed: 50
Hit Ratio @ 10: 0.580
Mean Rank: 15.7
MRR: 0.230
TEST SET RESULTS:
Hit Ratio @ 10: 0.580
Mean Rank: 15.7
MRR: 0.230





In [42]:
# Demonstrate loading complete model using class method
print(f"\n4. LOADING COMPLETE MODEL (CLASS METHOD)")
print("-" * 50)

loaded_model = NCF.load_model(
    filepath=model_save_path,
    ratings=train_ratings,
    feature_processor=feature_processor,
    candidate_generator=candidate_gen
)



4. LOADING COMPLETE MODEL (CLASS METHOD)
--------------------------------------------------
Complete model loaded successfully from models/ncf_trained_model.pth


In [46]:

# Initialize the cold start recommender with the trained model
cold_start_recommender = ColdStartRecommender(
    trained_model=loaded_model,
    feature_processor=feature_processor,
    candidate_generator=candidate_gen,
    movies_df=movies
)

print("Cold Start Recommender initialized successfully!")


Cold Start Recommender initialized successfully!


In [47]:
# DEMONSTRATION: Cold Start Recommendation Examples

print("="*80)
print("COLD START RECOMMENDATION SYSTEM DEMONSTRATION")
print("="*80)

# Example 1: Pure Cold Start - New user with only demographics
print("\n1. PURE COLD START SCENARIO")
print("-" * 50)

new_user_demographics = {
    'gender': 'M',     # Male
    'age': 25,         # 25 years old  
    'occupation': 4    # College/grad student (based on MovieLens occupation codes)
}

print(f"New User Demographics: {new_user_demographics}")

# Get recommendations without any ratings
cold_start_recommendations = cold_start_recommender.recommend_for_new_user(
    user_demographics=new_user_demographics,
    user_ratings=None,  # No ratings yet
    num_recommendations=10
)

print(f"\nTop 10 Cold Start Recommendations:")
for i, (movie_id, title, score) in enumerate(cold_start_recommendations, 1):
    print(f"{i:2d}. {title:<50} (Score: {score:.3f})")

# Example 2: Get onboarding movies for initial rating collection
print(f"\n\n2. ONBOARDING MOVIES FOR RATING COLLECTION")
print("-" * 50)

onboarding_movies = cold_start_recommender.get_onboarding_movies(num_movies=8)

print("Movies to show new user for initial ratings (diverse genres):")
for i, (movie_id, title, genres) in enumerate(onboarding_movies, 1):
    print(f"{i}. {title:<40} | Genres: {genres}")

# Example 3: Warm Cold Start - User has provided some initial ratings
print(f"\n\n3. WARM COLD START SCENARIO")
print("-" * 50)

# Simulate user rating some of the onboarding movies
initial_ratings = [
    (onboarding_movies[0][0], 5),  # Loved the first movie
    (onboarding_movies[1][0], 4),  # Liked the second movie
    (onboarding_movies[4][0], 4),  # Liked the fourth movie
    (onboarding_movies[5][0], 4),  # Liked the fourth movie
    (onboarding_movies[6][0], 4),  # Liked the fourth movie
    (onboarding_movies[7][0], 4),  # Liked the fourth movie
]

print("User's initial ratings:")
for movie_id, rating in initial_ratings:
    movie_title = movies[movies['movie_id'] == movie_id]['title'].iloc[0]
    print(f"  {movie_title:<50} - Rating: {rating}/5")

# Get improved recommendations based on initial ratings
warm_recommendations = cold_start_recommender.recommend_for_new_user(
    user_demographics=new_user_demographics,
    user_ratings=initial_ratings,
    num_recommendations=10
)

print(f"\nTop 10 Recommendations after initial ratings:")
for i, (movie_id, title, score) in enumerate(warm_recommendations, 1):
    print(f"{i:2d}. {title:<50} (Score: {score:.3f})")

print(f"\n\n4. COMPARISON: Different User Demographics")
print("-" * 50)

# Example with different demographics
female_user_demographics = {
    'gender': 'F',     # Female
    'age': 45,         # 45 years old
    'occupation': 0    # Other/not specified
}

print(f"Different User Demographics: {female_user_demographics}")

female_recommendations = cold_start_recommender.recommend_for_new_user(
    user_demographics=female_user_demographics,
    user_ratings=None,
    num_recommendations=5
)

print(f"\nTop 5 Recommendations for different demographic:")
for i, (movie_id, title, score) in enumerate(female_recommendations, 1):
    print(f"{i}. {title:<50} (Score: {score:.3f})")

print(f"\n{'='*80}")
print("Cold Start Recommendation Demonstration Complete!")
print(f"{'='*80}")


COLD START RECOMMENDATION SYSTEM DEMONSTRATION

1. PURE COLD START SCENARIO
--------------------------------------------------
New User Demographics: {'gender': 'M', 'age': 25, 'occupation': 4}

Top 10 Cold Start Recommendations:
 1. Battlefield Earth (2000)                           (Score: 0.968)
 2. Mutiny on the Bounty (1935)                        (Score: 0.968)
 3. Maltese Falcon, The (1941)                         (Score: 0.964)
 4. King Kong (1933)                                   (Score: 0.961)
 5. Muppet Movie, The (1979)                           (Score: 0.960)
 6. Platoon (1986)                                     (Score: 0.956)
 7. Dumbo (1941)                                       (Score: 0.955)
 8. Vertigo (1958)                                     (Score: 0.953)
 9. Akira (1988)                                       (Score: 0.947)
10. Doctor Zhivago (1965)                              (Score: 0.945)


2. ONBOARDING MOVIES FOR RATING COLLECTION
-------------------------