In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from pathlib import Path
from torch.utils.data import Dataset, DataLoader
from dataclasses import dataclass

from data_loader import prepare_ml_pipeline, MovieLensData, MovieLensDataset
from matrix_factor import BiasedMF, train_mf
from debiasing import train_debiasing_model, DebiasingModel
from heater import train_heater, save_heater_embeddings
#from evaluator import RecommenderEvaluator
from dropoutnet import DeepCF, train_dropoutnet
from new_evaluator import ndcg_calc_base, ndcg_calc_dropout, evaluate_recommendations



In [None]:
def run_evaluation_pipeline(device: str = 'cuda' if torch.cuda.is_available() else 'cpu'):
    """Complete evaluation pipeline with DropoutNet and debiasing"""
    
    # Define evaluation parameters
    k_values = [5, 10, 20, 50]
    dropoutnet_params = {
        'model_select': [800, 400],
        'rank_out': 200,
        'dropout_rate': 0.5,
        'batch_size': 100,           
        'n_scores_per_user': 100,    
        'data_batch_size': 500,      
        'max_data_per_step': 10000,  
        'num_epochs': 50,
        'learning_rate': 0.005
    }
    
    debiasing_params = {
        'model_select': [100],
        'alpha': 4.0,
        'batch_size': 50,
        'num_epochs': 1,
        'reg': 1e-5
    }
    
    # 1. Load and prepare data
    print("Loading data...")
    ml_data, train_loader, valid_loader, test_loader = prepare_ml_pipeline(cold_start=False)
    #evaluator = RecommenderEvaluator(ml_data)
    
    # 2. Train and evaluate base model
    print("\nTraining base model...")
    base_model = BiasedMF(ml_data.n_users, ml_data.n_items).to(device)
    base_model = train_mf(base_model, train_loader, num_epochs=200)
    
    base_ndcgs = ndcg_calc_base(base_model, test_loader, ml_data)
    print(f"Base NDCGS: {base_ndcgs}")
    
    # 3. Train and evaluate DropoutNet model
    print("\nTraining DropoutNet model...")
    dropoutnet = train_dropoutnet(
        ml_data=ml_data,
        base_model=base_model,
        **dropoutnet_params,
        device=device
    )
    
    
    print("\nEvaluating DropoutNet model...")
    dropout_ndcgs = ndcg_calc_dropout(base_model, dropoutnet, test_loader, ml_data, ks = [5,10,20,50])
    print(f"Dropout NDCGs {dropout_ndcgs}")
    
    # 4. Train and evaluate debiasing model
    print("\nTraining debiasing model...")
    debiasing_model = train_debiasing_model(
        base_model=dropoutnet,  # Using DropoutNet as base
        ml_data=ml_data,
        **debiasing_params,
        device=device
    )
    
    print("\nEvaluating debiased model...")
    debiased_ndcgs = ndcg_calc_base(dropoutnet, test_loader)
    
    print(f"Debiased NDCGs {dropout_ndcgs}")
    
    # # 5. Analyze popularity bias
    # print("\nAnalyzing popularity bias...")
    # # For DropoutNet
    # dropoutnet_bias_metrics = evaluator.analyze_popularity_bias(
    #     model=dropoutnet,  # Your trained DropoutNet model
    #     base_model=base_model,
    #     device=device
    # )
    # print("\nDropoutNet Bias Analysis Results:")
    # for metric, value in dropoutnet_bias_metrics.items():
    #     print(f"{metric}: {value:.4f}")

    # debiased_bias_metrics = evaluator.analyze_popularity_bias(
    #     model=debiasing_model,
    #     base_model=dropoutnet,
    #     k=20,
    #     device=device
    # )
    
    # print("\nDropoutNet Bias Analysis Results:")
    # for metric, value in dropoutnet_bias_metrics.items():
    #     print(f"{metric}: {value:.4f}")
    
    # print("\nDebiased Model Bias Analysis Results:")
    # for metric, value in debiased_bias_metrics.items():
    #     print(f"{metric}: {value:.4f}")
        
    # # Store bias metrics for plotting
    # evaluator.bias_metrics = {
    #     'dropoutnet': dropoutnet_bias_metrics,
    #     'debiased': debiased_bias_metrics
    # }
    
    # 6. Plot comparisons
    # print("\nGenerating comparison plots...")
    # evaluator.plot_performance_comparison(
    #     k_values=k_values,
    #     save_path='evaluation_results.png'
    # )
    
    #return evaluator
    return 0

In [9]:
test = run_evaluation_pipeline()

Loading data...
Dataset loaded with cold_start=False:
Train: 700146 interactions
Valid: 100021 interactions
Test: 200042 interactions

Training base model...
Epoch 5/200 - Avg Loss: 0.9434
Epoch 10/200 - Avg Loss: 0.8787
Epoch 15/200 - Avg Loss: 0.8674
Epoch 20/200 - Avg Loss: 0.8640
Epoch 25/200 - Avg Loss: 0.8627
Epoch 30/200 - Avg Loss: 0.8615
Epoch 35/200 - Avg Loss: 0.8609


KeyboardInterrupt: 

In [None]:
k_values = [5, 10, 20, 50]
device = 'cpu'

# 1. Load and prepare data
print("Loading data...")
ml_data, train_loader, valid_loader, test_loader = prepare_ml_pipeline(cold_start=False)
evaluator = RecommenderEvaluator(ml_data)

# 2. Train and evaluate base model
print("\nTraining base model...")
base_model = BiasedMF(ml_data.n_users, ml_data.n_items).to(device)
base_model = train_mf(base_model, train_loader, num_epochs=1)

print("\nEvaluating base model...")
base_metrics = evaluator.evaluate_base_mf(
    model=base_model,
    data_loader=test_loader,
    k_values=k_values,
    device=device
)

print("\nBase Model Results:")
for k in k_values:
    print(f"NDCG@{k}: {base_metrics[f'ndcg@{k}']:.4f}")

In [None]:
from new_evaluator import ndcg_calc

k_values = [5, 10, 20, 50]
device = 'cpu'

# 1. Load and prepare data
print("Loading data...")
ml_data, train_loader, valid_loader, test_loader = prepare_ml_pipeline(cold_start=True)
evaluator = RecommenderEvaluator(ml_data)


# 2. Train and evaluate base model
print("\nTraining base model...")
base_model = BiasedMF(ml_data.n_users, ml_data.n_items).to(device)
base_model = train_mf(base_model, train_loader, num_epochs=20)

ndcgs = ndcg_calc(base_model, test_loader)



In [None]:
print(ndcgs)