In [1]:
# Import necessary packages
import sys
import os
import numpy as np
import pandas as pd
import json  # To handle JSON files
import torch
from datetime import date
import time  # For tracking generation time

# Ensure the current directory is in sys.path
sys.path.append('./')  # Adjust the path if necessary

# Import custom modules
from data_loader import load_user_reviews
from utils import (
    normalize,
    compute_similarity,
    recall_at_k,
    ndcg_at_k,
    extract_latest_n_reviews,
    extract_product_names_adapter,
    extract_product_names_alpaca
)
from retrieval import initialize_chromadb, collect_results_per_product
from model_pipeline import RecommenderModel  # Import the RecommenderModel class
from config import (
    PIPELINE_PARAMS,
    USER_PROFILE_PROMPT,
    PRELIMINARY_RECOMMENDATIONS_PROMPT,
    ALPACA_LORA_PROMPTS_USER_PROFILE,
    ALPACA_LORA_PROMPTS_CANDIDATE_ITEMS,
    QLORA_PARAMS,
    MODEL_PATH,
    TOKENIZER_PATH,
    get_model_path_user_profile_and_candidate_items,
    get_tokenizer_path_user_profile_and_candidate_items
)

# Set the current date
current_date = date.today()

# Check if CUDA is available and set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def run_experiment(
    num_runs=1,
    with_product_description=False,
    num_reviews=10,
    sample_size=None,
    dataset='beauty', 
    data_source='chatgpt_data'  # <--- NEW PARAM
):
    """
    Runs the recommendation experiment with specified parameters.

    Args:
        num_runs (int): Number of times to run the experiment.
        with_product_description (bool): Whether to include product descriptions in the preliminary recommendations.
        num_reviews (int): Number of latest reviews to extract per user.
        sample_size (int or None): Sample size for the model. If provided, adapter logic is used.
        dataset (str): The dataset to use ("beauty" or "video_games").
    """
    # Initialize the RecommenderModel with the adjusted paths
    torch.cuda.empty_cache()
    use_adapter = sample_size is not None  # determine if adapter logic should be used

    # Initialize the RecommenderModel with new parameters
    recommender_model = RecommenderModel(
        sample_size=sample_size, 
        adapter=use_adapter,
        dataset=dataset,           # <--- pass along
        data_source=data_source    # <--- pass along
    )

    # Initialize ChromaDB following the logic from notebook#2
    # We assume initialize_chromadb(dataset_name) returns the corresponding collection
    collection = initialize_chromadb(dataset)

    # Load product data depending on the dataset
    if dataset == 'beauty':
        train_file = 'new_data/new_train_val_output.json'
        test_file = 'new_data/test_output.json'
    elif dataset == 'video_games':
        train_file = 'new_data/Video_Games.shortened_reduced_300_users.json'       # Adjust if a different file is used
        test_file = 'new_data/Video_Games.test_reduced_300_users.json'             # Adjust if a different file is used
    else:
        raise ValueError("Invalid dataset. Use 'beauty' or 'video_games'.")

    input_set = load_user_reviews(train_file)
    input_set_test = load_user_reviews(test_file)
    num_users = len(input_set)

    # Experiment parameters
    SIMILARITY_THRESHOLD = 100.0
    K_VALUES = [1, 5, 10, 20]
    MAX_TOTAL_RETRIES = 0
    MAX_ITEM_RETRIES = 20 

    # Initialize lists and counters
    all_recalls, all_ndcgs = [], []
    skipped_users = []
    total_skips = 0  # Total number of users skipped
    user_skip_counts = {}  # Dictionary to track skips per user

    # Run the experiment for the specified number of runs
    for run_num in range(num_runs):
        print(f"\nStarting experiment run {run_num + 1}/{num_runs}...")
        results = []

        # Define the result JSON file path - dataset is now included in the naming
        description_flag = "with_description" if with_product_description else "without_description"
        adapter_flag = f"adapter_{use_adapter}" if use_adapter else "no_adapter"
        result_file_path = (
            f"results_{dataset}_"
            f"{data_source}_"
            f"{description_flag}_"
            f"{adapter_flag}_"
            f"reviews_{num_reviews}_"
            f"sample_{sample_size}_"
            f"{current_date}_"
            f"run_{run_num + 1}_"
            f"token_size_4096_"
            f"with_full_reviews.json"
        )


# Loop over each user
        for user_index in range(num_users):
            print(f"\nProcessing user {user_index + 1}/{num_users}")

            # Start timing the user processing
            start_time = time.time()

            total_retries, success = 0, False
            while not success and total_retries < MAX_TOTAL_RETRIES:
                try:
                    user_result = {'user_id': user_index + 1}
                    example_user = [input_set[user_index]]
                    latest_reviews = extract_latest_n_reviews(example_user, num_reviews)

                    if not latest_reviews:
                        print(f"User {user_index + 1} skipped due to no latest reviews.")
                        skipped_users.append(user_index + 1)
                        total_skips += 1
                        user_skip_counts[user_index + 1] = total_retries
                        break  # Exit top-level loop for this user

                    # Track the number of reviews for this user
                    num_user_reviews = len(latest_reviews)
                    print(f"Number of reviews for User {user_index + 1}: {num_user_reviews}")
                    user_result['num_reviews'] = num_user_reviews

                    # Format review text
                    review_text = [
                        f"Product: {review['product_name']}\nRating: {review['rating']}\nReview: {review['text']}\n"
                        for review in latest_reviews
                    ]
                    print(f"Review Text for User {user_index + 1}:\n{review_text}\n")
                    review_text_str = "\n".join(review_text)

                    # Generate user profile
                    profile = recommender_model.create_user_profile(
                        reviews=review_text_str,
                        use_adapter=use_adapter
                    )
                    print(f"Generated Profile for User {user_index + 1}:\n{profile}\n")
                    user_result['profile'] = profile

                    # Generate preliminary recommendations (inner retry loop)
                    retries_item = 0
                    while retries_item < MAX_ITEM_RETRIES:
                        try:
                            preliminary_rec = recommender_model.create_preliminary_recommendations(
                                user_profile=profile,
                                use_adapter=use_adapter,
                                product_descriptions=with_product_description
                            )
                            print(f"Preliminary Recommendations for User {user_index + 1}:\n{preliminary_rec}\n")
                            user_result['preliminary_recommendations'] = preliminary_rec

                            if use_adapter:
                                product_names = extract_product_names_alpaca(preliminary_rec)
                            else:
                                product_names = extract_product_names_adapter(preliminary_rec)
                            if not product_names:
                                raise Exception("No product names extracted.")

                            print(f"Extracted Product Names for User {user_index + 1}: {product_names}\n")
                            user_result['extracted_products'] = product_names

                            # If we reach here, item generation succeeded
                            break

                        except Exception as e:
                            retries_item += 1
                            print(f"Error in item generation for user {user_index + 1}: {e}")
                            print(f"Retrying item generation ({retries_item}/{MAX_ITEM_RETRIES})...")

                    else:
                        # If we exhaust item retries without a 'break'
                        # => Raise an exception to trigger a top-level retry
                        raise Exception(
                            f"Max item retries ({MAX_ITEM_RETRIES}) reached for User {user_index + 1}. "
                            "Reraising to top-level for another attempt."
                        )

                    # Collect final results per product
                    user_history = [rev['parent_asin'] for rev in latest_reviews if 'parent_asin' in rev]
                    final_results = collect_results_per_product(product_names, collection, user_history, max_products=20)
                    if final_results == -1:
                        print(f"User {user_index + 1} skipped due to no recommendations.")
                        skipped_users.append(user_index + 1)
                        total_skips += 1
                        user_skip_counts[user_index + 1] = total_retries
                        break

                    # Get the test product from the test dataset
                    example_user_test = [input_set_test[user_index]]
                    test_review = extract_latest_n_reviews(example_user_test, 1)
                    test_product = test_review[0]['parent_asin']
                    user_result['test_product'] = test_product

                    # Prepare recommended products list
                    recommended_products = []
                    for doc, distance, metadata in final_results:
                        asin = metadata
                        recommended_products.append({
                            'asin': asin,
                            'distance': distance
                        })

                    user_result['recommended_products'] = recommended_products

                    # Evaluate recommendations
                    normalized_test_product = normalize(test_product)
                    normalized_ranked_products = [normalize(prod['asin']) for prod in recommended_products]
                    similarity_scores = []
                    matches = []
                    for rec_product in normalized_ranked_products:
                        sim_score = compute_similarity(rec_product, normalized_test_product)
                        similarity_scores.append(sim_score)
                        matches.append(sim_score >= SIMILARITY_THRESHOLD)

                    # Add evaluation results to user_result
                    for idx, (prod, score, match) in enumerate(zip(recommended_products, similarity_scores, matches)):
                        prod['similarity_score'] = score
                        prod['match'] = match

                    print("\nSimilarity Scores and Matches:")
                    for idx, (prod, score, match) in enumerate(zip(recommended_products, similarity_scores, matches), 1):
                        print(f"{idx}. ASIN: {prod['asin']}")
                        print(f"   Similarity Score: {score:.2f}%")
                        print(f"   Match: {'Yes' if match else 'No'}")

                    # Collect Recall@K and NDCG@K for this user
                    user_recalls = {}
                    user_ndcgs = {}
                    for k in K_VALUES:
                        recall_val = recall_at_k(matches, k)
                        ndcg_val = ndcg_at_k(matches, k)
                        user_recalls[f'Recall@{k}'] = recall_val
                        user_ndcgs[f'NDCG@{k}'] = ndcg_val
                        all_recalls.append((k, recall_val))
                        all_ndcgs.append((k, ndcg_val))

                    user_result['evaluation'] = {
                        'recalls': user_recalls,
                        'ndcgs': user_ndcgs
                    }

                    # Add retries information to user_result
                    user_result['total_retries'] = total_retries
                    user_result['item_retries'] = retries_item

                    # Stop timing
                    end_time = time.time()
                    generation_time = end_time - start_time
                    print(f"Time taken for user {user_index + 1}: {generation_time:.2f} seconds")
                    user_result['generation_time'] = generation_time

                    # Append user_result to results
                    results.append(user_result)

                    # Mark the user as successfully processed
                    success = True

                except Exception as e:
                    total_retries += 1
                    print(f"Error processing user {user_index + 1}: {e}")
                    if total_retries >= MAX_TOTAL_RETRIES:
                        # If we've already retried top-level logic 3 times, skip user
                        print(f"User {user_index + 1} skipped after {MAX_TOTAL_RETRIES} top-level retries.")
                        skipped_users.append(user_index + 1)
                        total_skips += 1
                        user_skip_counts[user_index + 1] = total_retries

                        end_time = time.time()
                        generation_time = end_time - start_time
                        print(f"Time taken for user {user_index + 1}: {generation_time:.2f} seconds")

                    else:
                        print(
                            f"Retrying user processing (Profile + Item Generation) "
                            f"({total_retries}/{MAX_TOTAL_RETRIES})..."
                        )


        # Calculate overall metrics for each K
        overall_metrics = {}
        for k in K_VALUES:
            recalls_at_k = [rec for k_val, rec in all_recalls if k_val == k]
            ndcgs_at_k = [ndcg for k_val, ndcg in all_ndcgs if k_val == k]
            mean_recall = np.mean(recalls_at_k) if recalls_at_k else 0.0
            mean_ndcg = np.mean(ndcgs_at_k) if ndcgs_at_k else 0.0
            overall_metrics[f'Recall@{k}'] = mean_recall
            overall_metrics[f'NDCG@{k}'] = mean_ndcg

        # Prepare the final JSON data
        json_data = {
            'experiment_info': {
                'sample_size': sample_size,
                'num_run': run_num + 1,
                'adapter': use_adapter,
                'with_product_description': with_product_description,
                'num_reviews': num_reviews,
                'date': str(current_date),
                'dataset': dataset,
                'data_source': data_source,   # <--- store in experiment info
            },
            'results': results,
            'overall_metrics': overall_metrics,
            'skipped_users': skipped_users,
            'total_skips': total_skips,
            'user_skip_counts': user_skip_counts
        }

        # Write the JSON data to the file
        with open(result_file_path, 'w', encoding='utf-8') as json_file:
            json.dump(json_data, json_file, indent=4)

        print(f"Experiment run {run_num + 1} completed. Results saved to {result_file_path}.")
        print(f"Total users skipped in this run: {total_skips}")


In [3]:
run_experiment( #TODO need to run
    num_runs=3,
    with_product_description=False,
    num_reviews=100,
    dataset= "video_games",
)
run_experiment( #TODO need to run
    num_runs=3,
    with_product_description=False,
    num_reviews=100,
    dataset= "beauty",
)