In [1]:
import torch
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
from collections import defaultdict
from tqdm import tqdm

from data_loader import prepare_ml_pipeline
from matrix_factor import BiasedMF, train_mf
from debiasing import train_debiasing_model
from dropoutnet import train_dropoutnet
from evaluator import ndcg_calc_sampled, ndcg_calc_dropout_sampled, ndcg_calc_debiased_sampled, evaluate_split
import evaluator as ev



In [4]:
def normalize_ndcg_by_positives(ndcg_scores, n_positives, avg_positives):

    norm_factor = avg_positives / n_positives  # will be < 1 for cold users (more positives), > 1 for warm users (fewer positives)
    return [score * norm_factor for score in ndcg_scores]

def analyze_positive_distribution(ml_data, cold_users, warm_users):
    """
    Analyze the distribution of positive ratings between train and test sets
    for both cold and warm users
    """
    train_positives_warm = (ml_data.train_data['user_idx'].isin(warm_users) & (ml_data.train_data['rating'] >= 4)).sum()
    test_positives_warm = (ml_data.test_data['user_idx'].isin(warm_users) & (ml_data.test_data['rating'] >= 4)).sum()
    total_positives_warm = train_positives_warm + test_positives_warm

    train_positives_cold = (ml_data.train_data['user_idx'].isin(cold_users) & (ml_data.train_data['rating'] >= 4)).sum()
    test_positives_cold = (ml_data.test_data['user_idx'].isin(cold_users) & (ml_data.test_data['rating'] >= 4)).sum()
    total_positives_cold = train_positives_cold + test_positives_cold

    print("\nPositive Ratings Distribution Analysis:")
    print("-" * 40)
    print(f"Warm Users:")
    print(f"  Train positives: {train_positives_warm}")
    print(f"  Test positives: {test_positives_warm}")
    print(f"  Total positives: {total_positives_warm}")
    print(f"  Ratio in test: {test_positives_warm/total_positives_warm:.2%}")
    print(f"  Average per user: {total_positives_warm/len(warm_users):.2f}")

    print(f"\nCold Users:")
    print(f"  Train positives: {train_positives_cold}")
    print(f"  Test positives: {test_positives_cold}")
    print(f"  Total positives: {total_positives_cold}")
    print(f"  Ratio in test: {test_positives_cold/total_positives_cold:.2%}")
    print(f"  Average per user: {total_positives_cold/len(cold_users):.2f}")

    return {
        'warm': {
            'train': train_positives_warm,
            'test': test_positives_warm,
            'total': total_positives_warm,
            'test_ratio': test_positives_warm/total_positives_warm
        },
        'cold': {
            'train': train_positives_cold,
            'test': test_positives_cold,
            'total': total_positives_cold,
            'test_ratio': test_positives_cold/total_positives_cold
        }
    }


def run_evaluation_pipeline(device: str = 'cuda' if torch.cuda.is_available() else 'cpu'):
    """Complete evaluation pipeline with DropoutNet and debiasing"""
    
    # Define evaluation parameters
    k_values = [15,30]
    

    dropoutnet_params = {
        'model_select': [800, 400],
        'rank_out': 200,
        'dropout_rate': 0.5,
        'batch_size': 1000,
        'n_scores_per_user': 2500,
        'data_batch_size': 1000,
        'max_data_per_step': 50000,
        'num_epochs': 25,
        'learning_rate': 0.001
    }
    
    #Params from https://github.com/Zziwei/Fairness-in-Cold-Start-Recommendation/blob/main/Scale/main.py
    debiasing_params = {
        'model_select': [100],
        'alpha': 4.0,
        'batch_size': 50,
        'num_epochs': 100,
        'reg': 0.000001
    }
    
    # 1. Load and prepare data
    print("Loading data...")
    ml_data, train_loader, valid_loader, test_loader = prepare_ml_pipeline(cold_start=False)
  

    # Get cold/warm user split
    cold_users, warm_users = ev.get_user_split(ml_data.train_data, ml_data.test_data)
    cold_items, warm_items = ev.get_item_split(ml_data.train_data, ml_data.test_data)
    
    # Analyze positive distribution
    #distribution_stats = analyze_positive_distribution(ml_data, cold_users, warm_users)
    
    # Calculate normalized NDCG
    cold_positives = len(ml_data.test_data[ml_data.test_data['user_idx'].isin(cold_users)]['rating'] >= 4)
    warm_positives = len(ml_data.test_data[ml_data.test_data['user_idx'].isin(warm_users)]['rating'] >= 4)
    avg_positives = (cold_positives + warm_positives) / 2
    
    
    # 2. Train and evaluate base model
    print("\nTraining base model...")
    base_model = BiasedMF(ml_data.n_users, ml_data.n_items).to(device)
    base_model = train_mf(model = base_model, train_loader= train_loader, val_loader= valid_loader, ml_data= ml_data, num_epochs=25, lr = .01)
    
    base_ndcgs, base_prec, base_recall = ndcg_calc_sampled(base_model, test_loader, ml_data, k_values=k_values)
    final_mdg, mdg_anal = ev.mdg_calc_base(base_model, test_loader, ml_data)
    print(f"Base NDCGS: {base_ndcgs}")
    print(f"Final MDG: {mdg_anal['all']['mean']}, min10: {mdg_anal['bottom_10']['mean']}, min20: {mdg_anal['bottom_20']['mean']}, top10: {mdg_anal['top_10']['mean']}")
    base_cold_warm = ev.analyze_mdg_with_splits(final_mdg, cold_items, warm_items)
    print("\nBase Model MDG Analysis:")
    ev.print_mdg_analysis(base_cold_warm)  
    
    print("\nEvaluating base model...")
    base_metrics = evaluate_split(
        eval_model=base_model,
        test_loader=test_loader,
        ml_data=ml_data,
        cold_users=cold_users,
        warm_users=warm_users,
        k_values=k_values,
        evaluation_func=ndcg_calc_sampled
    )
    
    print("\nBase Model Results:")
    print(f"Base NDCGs: {base_ndcgs}")
    print("\nBase Model Results:")
    print(f"Cold Users (n={base_metrics.n_cold_users}):")
    normalized_cold_ndcgs = normalize_ndcg_by_positives(base_metrics.cold_users.ndcg, cold_positives, avg_positives)
    print(f"  Original NDCG@{k_values}: {base_metrics.cold_users.ndcg}")
    print(f"  Normalized NDCG@{k_values}: {normalized_cold_ndcgs}")
    
    print(f"Warm Users (n={base_metrics.n_warm_users}):")
    normalized_warm_ndcgs = normalize_ndcg_by_positives(base_metrics.warm_users.ndcg, warm_positives, avg_positives)
    print(f"  Original NDCG@{k_values}: {base_metrics.warm_users.ndcg}")
    print(f"  Normalized NDCG@{k_values}: {normalized_warm_ndcgs}")

    
    #3. Train and evaluate DropoutNet model
    print("\nTraining DropoutNet model...")
    dropoutnet = train_dropoutnet(
        ml_data=ml_data,
        base_model=base_model,
        val_loader=valid_loader,
        test_loader=test_loader,
        **dropoutnet_params,
        device=device
    )

    print("\nEvaluating DropoutNet model...")
    dropout_ndcgs, drop_mdg, drop_mdg_anal = ndcg_calc_dropout_sampled(base_model, dropoutnet, test_loader, ml_data, k_values = k_values)
    drop_mdg, drop_mdg_anal = ev.mdg_calc_dropout(dropoutnet, base_model, test_loader,ml_data)
    print(f"Dropout NDCGs {dropout_ndcgs}")
    print(f"Final MDG: {drop_mdg_anal['all']['mean']}, min10: {drop_mdg_anal['bottom_10']['mean']}, min20: {drop_mdg_anal['bottom_20']['mean']}, top10: {drop_mdg_anal['top_10']['mean']}")
     # Analyze cold vs warm for dropout model
    dropout_cold_warm = ev.analyze_mdg_with_splits(drop_mdg, cold_items, warm_items)

    print("\nDebiased Model MDG Analysis:")
    ev.print_mdg_analysis(dropout_cold_warm)
    
    print("\nEvaluating DropoutNet model...")
    dropout_metrics = evaluate_split(
        eval_model=dropoutnet,  # Base model needed for embeddings
        test_loader=test_loader,
        ml_data=ml_data,
        cold_users=cold_users,
        warm_users=warm_users,
        k_values=k_values,
        evaluation_func=ndcg_calc_dropout_sampled,
        base_model=base_model  # Changed parameter name to avoid conflict
    )
    
    print("\nDropoutNet Results:")
    print(f"Dropout NDCGs: {dropout_ndcgs}")
    print(f"Cold Users (n={dropout_metrics.n_cold_users}):")
    normalized_cold_ndcgs = normalize_ndcg_by_positives(dropout_metrics.cold_users.ndcg, cold_positives, avg_positives)
    print(f"  Original NDCG@{k_values}: {dropout_metrics.cold_users.ndcg}")
    print(f"  Normalized NDCG@{k_values}: {normalized_cold_ndcgs}")
    
    print(f"Warm Users (n={dropout_metrics.n_warm_users}):")
    normalized_warm_ndcgs = normalize_ndcg_by_positives(dropout_metrics.warm_users.ndcg, warm_positives, avg_positives)
    print(f"  Original NDCG@{k_values}: {dropout_metrics.warm_users.ndcg}")
    print(f"  Normalized NDCG@{k_values}: {normalized_warm_ndcgs}")
    
    # 4. Train and evaluate debiasing model
    print("\nTraining debiasing model...")
    debiasing_model = train_debiasing_model(
        base_model=dropoutnet,
        original_mf=base_model,
        ml_data=ml_data,
        **debiasing_params,
        device=device
    )
    
    print("\nEvaluating debiased model...")
    debiased_ndcgs, debiased_prec, debiased_rec = ndcg_calc_debiased_sampled(dropoutnet, base_model, debiasing_model,test_loader,ml_data,k_values=k_values)
    debiased_mdg, debiased_mdg_anal = ev.mdg_calc_debiased(dropoutnet, base_model, debiasing_model, test_loader, ml_data)
    print(f"Debiased NDCGs {debiased_ndcgs}")
    print(f"Final MDG: {debiased_mdg_anal['all']['mean']}, min10: {debiased_mdg_anal['bottom_10']['mean']}, min20: {debiased_mdg_anal['bottom_20']['mean']}, top10: {debiased_mdg_anal['top_10']['mean']}")
    # Analyze cold vs warm for debiased model
    debiased_cold_warm = ev.analyze_mdg_with_splits(
        debiased_mdg,
        cold_items, warm_items
    )

    print("\nDebiased Model MDG Analysis:")
    ev.print_mdg_analysis(debiased_cold_warm)

    print("\nEvaluating debiased model...")
    debiased_metrics = evaluate_split(
        eval_model=debiasing_model,  # Base model for embeddings
        test_loader=test_loader,
        ml_data=ml_data,
        cold_users=cold_users,
        warm_users=warm_users,
        k_values=k_values,
        evaluation_func=ndcg_calc_debiased_sampled,
        prior_model=dropoutnet,
        original_mf=base_model
    )
    
    print("\nDebiased Model Results:")
    print(f"Debiased NDCGs: {debiased_ndcgs}")
    print(f"Cold Users (n={debiased_metrics.n_cold_users}):")
    normalized_cold_ndcgs = normalize_ndcg_by_positives(debiased_metrics.cold_users.ndcg, cold_positives, avg_positives)
    print(f"  Original NDCG@{k_values}: {debiased_metrics.cold_users.ndcg}")
    print(f"  Normalized NDCG@{k_values}: {normalized_cold_ndcgs}")
    
    print(f"Warm Users (n={debiased_metrics.n_warm_users}):")
    normalized_warm_ndcgs = normalize_ndcg_by_positives(debiased_metrics.warm_users.ndcg, warm_positives, avg_positives)
    print(f"  Original NDCG@{k_values}: {debiased_metrics.warm_users.ndcg}")
    print(f"  Normalized NDCG@{k_values}: {normalized_warm_ndcgs}")
    
    return {
        'base': base_metrics,
        'dropout': dropout_metrics,
        'debiased': debiased_metrics
    }
   

In [None]:
test = run_evaluation_pipeline()

Loading data...
