# Conversational Movie Recommendations with RecBole and Gemini

**Objective:** This notebook demonstrates how a pre-trained RecBole recommendation model can be used for inference to provide personalized movie recommendations within a simulated chatbot conversation powered by the Gemini API. The chatbot will also attempt to provide explanations for its recommendations based on the user's (simulated) viewing history.

**Steps:**
1.  **Setup & Configuration:** Import libraries, load API keys, configure SDKs, define paths, and set parameters.
2.  **Load MovieLens Item Data:** Load movie titles and genres from RecBole's processed `ml-100k.item` file.
3.  **Load Pre-trained RecBole Model & Dataset:** Load a saved model checkpoint and the corresponding dataset object.
4.  **Simulate User Profile & History:** Define a sample user and their liked movies (using original MovieLens IDs).
5.  **Generate Recommendations:** Use the loaded RecBole model to get recommendations.
6.  **Conversational Agent (Gemini):** Craft prompts and interact with Gemini for conversational output.
7.  **Showcase Interaction:** Run an example chat.

**CRITICAL PREREQUISITES:**
- Ensure libraries are installed:
  `pip install recbole pandas python-dotenv google-generativeai`
- A `.env` file in the project root with `GEMINI_API_KEY`.
- A RecBole model checkpoint saved from `04_recbole_offline_evaluation_v2.ipynb`.
- **RecBole's processed atomic files (e.g., `ml-100k.item`, `ml-100k.dataset`) MUST BE PRESENT in the `[PROJECT_ROOT]/recbole_data/ml-100k/` directory.** These are generated by notebook `04` when `save_dataset: True` is set.
- **If the chosen RecBole model required manual patches to its source code in notebook `04`, those patches must still be in effect.**

In [1]:
import os
import json
import pandas as pd
from dotenv import load_dotenv
import asyncio
import random
import torch 

import nest_asyncio
nest_asyncio.apply()

import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold

from recbole.quick_start import load_data_and_model
from recbole.data.interaction import Interaction
from recbole.data.dataset import Dataset # For type hinting if needed

# For interactive chat widget
import ipywidgets as widgets
from IPython.display import display, Markdown, clear_output

print("Libraries imported successfully.")

Libraries imported successfully.


# 2. Configuration
Define essential variables.

In [2]:
def load_api_key(project_r):
    env_path = os.path.join(project_r, '.env')
    if os.path.exists(env_path):
        load_dotenv(dotenv_path=env_path)
        print(f".env file loaded from: {env_path}")
    else:
        load_dotenv()
        if os.path.exists(".env"): print(f".env file loaded from current directory: {os.getcwd()}/.env")
        else: print(f"Warning: .env file not found at {env_path} or in current directory.")
    api_key_loaded = os.getenv("GEMINI_API_KEY")
    if not api_key_loaded: print("Warning: GEMINI_API_KEY not found.")
    else: print("GEMINI_API_KEY loaded.")
    return api_key_loaded

current_working_dir = os.getcwd()
print(f"Current working directory (os.getcwd()): {current_working_dir}")
if os.path.basename(current_working_dir).lower() == "notebooks":
    PROJECT_ROOT = os.path.abspath(os.path.join(current_working_dir, ".."))
else:
    PROJECT_ROOT = current_working_dir 
print(f"PROJECT_ROOT set to: {PROJECT_ROOT}")

API_KEY = load_api_key(PROJECT_ROOT)

SDK_CONFIGURED_SUCCESSFULLY = False
if API_KEY: 
    try:
        genai.configure(api_key=API_KEY)
        SDK_CONFIGURED_SUCCESSFULLY = True
        print("Google Generative AI SDK configured successfully.")
    except Exception as e: print(f"Error configuring Google Generative AI SDK: {e}")
else: print("Google Generative AI SDK not configured due to missing API key.")

DATA_PATH = os.path.join(PROJECT_ROOT, "recbole_data")
SAVED_MODELS_PATH = os.path.join(PROJECT_ROOT, "recbole_saved_models")
# This path should point to the directory containing ml-100k.item, ml-100k.inter etc.
ML_100K_PROCESSED_PATH = os.path.join(DATA_PATH, "ml-100k") 

RECDOBOLE_ITEM_FILE_NAME = "ml-100k.item" # RecBole's processed item file
RECDOBOLE_ITEM_FILE_PATH = os.path.join(ML_100K_PROCESSED_PATH, RECDOBOLE_ITEM_FILE_NAME)

if not os.path.exists(RECDOBOLE_ITEM_FILE_PATH):
    print(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
    print(f"CRITICAL ERROR: RecBole's processed item file '{RECDOBOLE_ITEM_FILE_NAME}' not found at: {RECDOBOLE_ITEM_FILE_PATH}")
    print(f"This file is essential for mapping item IDs to titles and genres.")
    print(f"It is generated by RecBole when you run notebook '04_recbole_offline_evaluation_v2.ipynb' with 'save_dataset: True'.")
    print(f"Please ensure that notebook ran successfully and created the processed dataset files in '{ML_100K_PROCESSED_PATH}'.")
    print(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
    PROCESSED_ITEM_FILE_FOUND = False
else:
    PROCESSED_ITEM_FILE_FOUND = True
    print(f"RecBole's processed item file '{RECDOBOLE_ITEM_FILE_NAME}' found at: {RECDOBOLE_ITEM_FILE_PATH}")

MODEL_TO_LOAD = 'LightGCN'
USER_SPECIFIED_MODEL_SUBPATH = "LightGCN_20250520_213658/LightGCN-May-20-2025_21-38-20.pth" # User provided
MODEL_CHECKPOINT_PATH = os.path.join(SAVED_MODELS_PATH, "ml-100k", USER_SPECIFIED_MODEL_SUBPATH)

print(f"Attempting to load specified model checkpoint: {MODEL_CHECKPOINT_PATH}")
if not os.path.exists(MODEL_CHECKPOINT_PATH):
     print(f"CRITICAL: Specified model checkpoint file not found at '{MODEL_CHECKPOINT_PATH}'. Auto-detection will be attempted if enabled.")
     # Auto-detection fallback logic (can be kept or removed if direct path is mandatory)
     MODEL_CHECKPOINT_DIR_BASE = os.path.join(SAVED_MODELS_PATH, "ml-100k")
     MODEL_CHECKPOINT_PATH_AUTO = None 
     if os.path.exists(MODEL_CHECKPOINT_DIR_BASE):
        model_dirs = [d for d in os.listdir(MODEL_CHECKPOINT_DIR_BASE) if os.path.isdir(os.path.join(MODEL_CHECKPOINT_DIR_BASE, d)) and d.startswith(MODEL_TO_LOAD)]
        if model_dirs:
            latest_model_dir_name = sorted(model_dirs, reverse=True)[0] 
            potential_model_dir = os.path.join(MODEL_CHECKPOINT_DIR_BASE, latest_model_dir_name)
            pth_files = [f for f in os.listdir(potential_model_dir) if f.endswith(".pth")]
            if pth_files:
                MODEL_CHECKPOINT_PATH_AUTO = os.path.join(potential_model_dir, pth_files[0])
                print(f"Auto-detected model checkpoint to load: {MODEL_CHECKPOINT_PATH_AUTO}")
                MODEL_CHECKPOINT_PATH = MODEL_CHECKPOINT_PATH_AUTO 
            else: print(f"Warning (auto-detection): No .pth file found in {potential_model_dir} for {MODEL_TO_LOAD}.")
        else: print(f"Warning (auto-detection): No directories for '{MODEL_TO_LOAD}' in {MODEL_CHECKPOINT_DIR_BASE}.")
     else: print(f"Warning (auto-detection): Base model checkpoint directory not found: {MODEL_CHECKPOINT_DIR_BASE}")
     if MODEL_CHECKPOINT_PATH is None or not os.path.exists(MODEL_CHECKPOINT_PATH):
         print(f"CRITICAL: Could not find a valid checkpoint for model '{MODEL_TO_LOAD}'.")
         MODEL_CHECKPOINT_PATH = None

NUM_RECOMMENDATIONS = 5
print(f"RecBole processed data path (expected): {ML_100K_PROCESSED_PATH}")

Current working directory (os.getcwd()): /mnt/c/Users/tduricic/Development/workspace/conversational-reco/notebooks
PROJECT_ROOT set to: /mnt/c/Users/tduricic/Development/workspace/conversational-reco
.env file loaded from: /mnt/c/Users/tduricic/Development/workspace/conversational-reco/.env
GEMINI_API_KEY loaded.
Google Generative AI SDK configured successfully.
RecBole's processed item file 'ml-100k.item' found at: /mnt/c/Users/tduricic/Development/workspace/conversational-reco/recbole_data/ml-100k/ml-100k.item
Attempting to load specified model checkpoint: /mnt/c/Users/tduricic/Development/workspace/conversational-reco/recbole_saved_models/ml-100k/LightGCN_20250520_213658/LightGCN-May-20-2025_21-38-20.pth
CRITICAL: Specified model checkpoint file not found at '/mnt/c/Users/tduricic/Development/workspace/conversational-reco/recbole_saved_models/ml-100k/LightGCN_20250520_213658/LightGCN-May-20-2025_21-38-20.pth'. Auto-detection will be attempted if enabled.
Auto-detected model checkpoi

## 3. Load MovieLens Item Data (Titles and Genres) from RecBole's Processed File

We will load movie titles and genres from RecBole's processed `ml-100k.item` file.
This file contains item IDs (RecBole's internal integer IDs), movie titles (as token sequences),
and classes/genres (as token sequences).

In [3]:
def load_recbole_item_data(processed_item_file_path):
    """Loads movie titles and genres from RecBole's .item file."""
    if not os.path.exists(processed_item_file_path):
        print(f"Error: RecBole processed item file not found at {processed_item_file_path}")
        return None
    
    movie_info_map_internal_id = {}
    try:
        with open(processed_item_file_path, 'r', encoding='utf-8') as f:
            header_line = f.readline().strip()
            header_parts = header_line.split('\t')
            if len(header_parts) < 3: 
                 header_parts = [h.strip() for h in header_line.split(' ') if h.strip()]

            try:
                item_id_col_name = next(col for col in header_parts if 'item_id:token' in col)
                title_col_name = next(col for col in header_parts if 'movie_title:token_seq' in col)
                class_col_name = next(col for col in header_parts if 'class:token_seq' in col)
                
                item_id_idx = header_parts.index(item_id_col_name)
                title_idx = header_parts.index(title_col_name)
                class_idx = header_parts.index(class_col_name)
            except (StopIteration, ValueError) as ve:
                print(f"Header parsing error in {processed_item_file_path}. Expected specific column field names (e.g., 'item_id:token'). Header found: {header_parts}. Error: {ve}")
                return None

            for line_num, line in enumerate(f, 1):
                parts = line.strip().split('\t')
                if len(parts) < max(item_id_idx, title_idx, class_idx) + 1:
                    parts = [p.strip() for p in line.strip().split(' ') if p.strip()]
                    if len(parts) < max(item_id_idx, title_idx, class_idx) + 1:
                        continue
                
                try:
                    internal_item_id = int(parts[item_id_idx]) 
                    
                    year_token = "UNKNOWN_YEAR" 
                    title_tokens_list = []
                    class_tokens_list = []
                    potential_year_idx = -1
                    temp_title_parts = []
                    # Corrected logic: iterate from title_idx up to the end of parts to find year
                    # then class tokens are after year, title tokens are before year.
                    
                    # Find year first
                    for i in range(title_idx, len(parts)):
                        token = parts[i]
                        if len(token) == 4 and token.isdigit() and 1880 <= int(token) <= 2050:
                            potential_year_idx = i
                            year_token = token
                            break
                    
                    if potential_year_idx != -1: # Year was found
                        title_tokens_list = parts[title_idx:potential_year_idx]
                        class_tokens_list = parts[potential_year_idx + 1:]
                    else: # No year found, assume title is up to class_idx, and class is from class_idx
                          # This might happen if year is missing or class_idx is not reliable for separation
                        title_tokens_list = parts[title_idx:class_idx] # If class_idx is the start of class tokens
                        class_tokens_list = parts[class_idx:]


                    movie_title = " ".join(title_tokens_list).strip()
                    genres_str = " ".join(class_tokens_list).strip()
                    if not movie_title: movie_title = f"Unknown Title (ID: {internal_item_id})"
                    if not genres_str: genres_str = "N/A"
                    
                    movie_info_map_internal_id[internal_item_id] = {
                        'title': movie_title,
                        'genres': genres_str
                    }
                except (IndexError, ValueError) as e:
                    print(f"Skipping line {line_num} due to parsing error: '{line.strip()}'. Error: {e}")
                    continue
        
        print(f"Loaded information for {len(movie_info_map_internal_id)} movies from RecBole's .item file.")
        return movie_info_map_internal_id
    except Exception as e:
        print(f"Error loading or processing RecBole's .item file {processed_item_file_path}: {e}")
        import traceback
        traceback.print_exc()
        return None

movie_info_map = None
if PROCESSED_ITEM_FILE_FOUND:
    movie_info_map = load_recbole_item_data(RECDOBOLE_ITEM_FILE_PATH)
    if movie_info_map:
        if movie_info_map: 
            valid_keys = [k for k in movie_info_map.keys() if isinstance(k, int) and k > 0]
            if valid_keys:
                sample_internal_item_id = valid_keys[0]
                print(f"\nSample movie info (Internal ID: {sample_internal_item_id}): {movie_info_map[sample_internal_item_id]}")
            else:
                print("\nNo valid integer keys found in movie_info_map for sample display.")
        else:
            print("\nMovie info map is empty after loading RecBole's .item file.")
else:
    print("\nSkipping loading of RecBole's .item file because it was not found.")

Loaded information for 1682 movies from RecBole's .item file.

Sample movie info (Internal ID: 1): {'title': 'Toy Story', 'genres': "Animation Children's Comedy"}


## 4. Load Pre-trained RecBole Model and Dataset

This step loads the saved RecBole model checkpoint and its corresponding dataset object.
The dataset object is essential as it contains the mappings between original user/item IDs and RecBole's internal numerical IDs.


In [4]:
recbole_model = None
recbole_dataset = None
recbole_config = None # To store the config object from loading

if MODEL_CHECKPOINT_PATH and os.path.exists(MODEL_CHECKPOINT_PATH) and PROCESSED_ITEM_FILE_FOUND:
    try:
        print(f"Loading RecBole model and dataset from checkpoint: {MODEL_CHECKPOINT_PATH}...")
        loaded_config, model, dataset, train_data, valid_data, test_data = load_data_and_model(
            model_file=MODEL_CHECKPOINT_PATH,
        )
        recbole_model = model
        recbole_dataset = dataset
        recbole_config = loaded_config 
        
        print(f"Successfully loaded model: {recbole_config['model']}") 
        print(f"Dataset '{recbole_dataset.dataset_name}' loaded with {recbole_dataset.user_num} users and {recbole_dataset.item_num} items.")
        
        recbole_model.eval()
        recbole_model.to(torch.device('cpu')) 
        print(f"Model '{recbole_config['model']}' is on device: {next(recbole_model.parameters()).device}")

    except Exception as e:
        print(f"Error loading RecBole model or dataset: {e}")
        print("Please ensure the MODEL_CHECKPOINT_PATH is correct and all necessary files (.pth, .dataset) are present.")
        print("Also, ensure your RecBole environment and any manual patches are consistent with the training environment.")
        import traceback
        traceback.print_exc()
elif not PROCESSED_ITEM_FILE_FOUND:
    print("Skipping RecBole model loading because RecBole's processed .item file was not found.")
else:
    print("MODEL_CHECKPOINT_PATH is not set or file does not exist. Cannot load RecBole model.")

  checkpoint = torch.load(model_file)
22 May 00:02    INFO  


Loading RecBole model and dataset from checkpoint: /mnt/c/Users/tduricic/Development/workspace/conversational-reco/recbole_saved_models/ml-100k/LightGCN_20250521_203428/LightGCN-May-21-2025_20-36-30.pth...


General Hyper Parameters:
gpu_id = -1
use_gpu = True
seed = 2024
state = INFO
reproducibility = True
data_path = /home/tduricic/anaconda3/envs/llm-eval/lib/python3.10/site-packages/recbole/config/../dataset_example/ml-100k
checkpoint_dir = /mnt/c/Users/tduricic/Development/workspace/conversational-reco/recbole_saved_models/ml-100k/LightGCN_20250521_203428
show_progress = True
save_dataset = True
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = False

Training Hyper Parameters:
epochs = 100
train_batch_size = 1024
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.8, 0.1, 0.1]}, 'order': 'RO', 'group_by': 'user', 'mode': {'valid': 'full', 'test': 'full'}}
repeatable = False
metrics = 

Successfully loaded model: LightGCN
Dataset 'ml-100k' loaded with 944 users and 1683 items.
Model 'LightGCN' is on device: cpu


## 5. Simulate User Profile & Generate Recommendations

Here, we'll:
1.  Define a sample user and a few movies they have "liked" (using their original MovieLens string IDs).
2.  Convert these original string IDs into RecBole's internal integer item IDs for filtering.
3.  Use the loaded RecBole model to generate top-N recommendations (internal integer IDs).
4.  Map the recommended internal integer item IDs and the liked internal integer item IDs back to movie titles and genres using our `movie_info_map` (which is keyed by internal integer IDs).


In [5]:
def get_user_history_details(dataset: 'Dataset', user_original_id_str: str, internal_id_movie_info_map: dict, rating_threshold:float=3.0):
    """Fetches and formats the interaction history for a given user."""
    if not dataset or not internal_id_movie_info_map:
        print("Dataset or movie_info_map not available for get_user_history_details.")
        return []
    
    history_details = []
    try:
        internal_uid_np = dataset.token2id(dataset.uid_field, [user_original_id_str])
        
        uid_padding_token = 0 
        if hasattr(dataset.config, 'padding_idx') and isinstance(dataset.config['padding_idx'], dict) and dataset.uid_field in dataset.config['padding_idx']:
             uid_padding_token = dataset.config['padding_idx'][dataset.uid_field]
        elif hasattr(dataset, 'padding_token') and isinstance(dataset.padding_token, dict) and dataset.uid_field in dataset.padding_token: 
             uid_padding_token = dataset.padding_token[dataset.uid_field]

        if internal_uid_np.size == 0 or internal_uid_np[0] == uid_padding_token: 
            print(f"User {user_original_id_str} not found in dataset or maps to padding token for history lookup.")
            return []
        internal_uid = internal_uid_np[0]

        # Convert Interaction object's data to a Pandas DataFrame
        # The .interaction attribute holds the dictionary of tensors
        interaction_data_dict = {
            field: tensor.cpu().numpy() 
            for field, tensor in dataset.inter_feat.interaction.items()
        }
        all_interactions_df = pd.DataFrame(interaction_data_dict)
        
        # Filter for the specific user
        user_interactions_df = all_interactions_df[all_interactions_df[dataset.uid_field] == internal_uid].copy()
        
        rating_field_name = dataset.config['RATING_FIELD']
        time_field_name = dataset.config['TIME_FIELD']
        item_id_field_name = dataset.config['ITEM_ID_FIELD']

        if rating_field_name and rating_field_name in user_interactions_df.columns:
            user_interactions_df[rating_field_name] = pd.to_numeric(user_interactions_df[rating_field_name], errors='coerce')
            user_interactions_df = user_interactions_df[user_interactions_df[rating_field_name] >= rating_threshold]
        
        if time_field_name and time_field_name in user_interactions_df.columns:
            user_interactions_df = user_interactions_df.sort_values(by=time_field_name, ascending=False)

        for _, interaction in user_interactions_df.iterrows():
            internal_item_id = int(interaction[item_id_field_name])
            movie_detail = internal_id_movie_info_map.get(internal_item_id)
            if movie_detail:
                rating_value = "N/A"
                if rating_field_name and rating_field_name in interaction:
                    rating_value = interaction[rating_field_name]
                history_details.append({
                    "title": movie_detail['title'],
                    "genres": movie_detail['genres'],
                    "rating": rating_value
                })
        return history_details
    except Exception as e:
        print(f"Error fetching user history for {user_original_id_str}: {e}")
        import traceback
        traceback.print_exc()
        return []


def get_recommendations_for_user(model, dataset: 'Dataset', user_original_id_str, user_liked_original_item_ids_str_list, top_k, internal_id_movie_info_map):
    if model is None or dataset is None:
        print("RecBole model or dataset not loaded. Cannot generate recommendations.")
        return [], []
    if not internal_id_movie_info_map:
        print("Movie info (from .item file) not loaded. Cannot provide full recommendation details.")
        return [], []

    try:
        internal_user_id_np_array = dataset.token2id(dataset.uid_field, [str(user_original_id_str)])
        
        uid_padding_token = 0 
        if hasattr(dataset.config, 'padding_idx') and isinstance(dataset.config['padding_idx'], dict) and dataset.uid_field in dataset.config['padding_idx']:
            uid_padding_token = dataset.config['padding_idx'][dataset.uid_field]
        elif hasattr(dataset, 'padding_token') and isinstance(dataset.padding_token, dict) and dataset.uid_field in dataset.padding_token: 
             uid_padding_token = dataset.padding_token[dataset.uid_field]


        if internal_user_id_np_array.size == 0 or internal_user_id_np_array[0] == uid_padding_token: 
             print(f"Warning: User ID '{user_original_id_str}' not found or maps to padding token ({uid_padding_token}). Using first valid user from dataset.")
             first_valid_original_uid_str = dataset.id2token(dataset.uid_field, [1])[0] 
             internal_user_id_np_array = dataset.token2id(dataset.uid_field, [first_valid_original_uid_str])
             print(f"Using fallback user: original ID '{first_valid_original_uid_str}', internal ID {internal_user_id_np_array[0]}")
        
        internal_user_id_tensor = torch.tensor(internal_user_id_np_array, dtype=torch.long, device=model.device)

        user_interaction = Interaction({dataset.uid_field: internal_user_id_tensor})
        user_interaction = dataset.join(user_interaction) 
        user_interaction = user_interaction.to(model.device)

        scores = model.full_sort_predict(user_interaction) 

        if scores.dim() == 1:
            scores = scores.unsqueeze(0) 

        internal_liked_ids_int_list = []
        if user_liked_original_item_ids_str_list:
            internal_liked_ids_np_array = dataset.token2id(dataset.iid_field, [str(iid) for iid in user_liked_original_item_ids_str_list])
            iid_padding_token = 0 
            if hasattr(dataset.config, 'padding_idx') and isinstance(dataset.config['padding_idx'], dict) and dataset.iid_field in dataset.config['padding_idx']:
                 iid_padding_token = dataset.config['padding_idx'][dataset.iid_field]
            elif hasattr(dataset, 'padding_token') and isinstance(dataset.padding_token, dict) and dataset.iid_field in dataset.padding_token:
                 iid_padding_token = dataset.padding_token[dataset.iid_field]
            internal_liked_ids_int_list = [int(iid) for iid in internal_liked_ids_np_array if iid != iid_padding_token and iid < dataset.item_num] 

        if internal_liked_ids_int_list:
            valid_internal_liked_ids = [iid for iid in internal_liked_ids_int_list if iid < scores.shape[1]]
            if valid_internal_liked_ids:
                 liked_ids_tensor = torch.tensor(valid_internal_liked_ids, dtype=torch.long, device=scores.device)
                 scores[0, liked_ids_tensor] = -torch.inf
        
        top_k_scores, top_k_indices = torch.topk(scores, k=top_k, dim=1)
        
        recommended_internal_ids_int_list = top_k_indices.squeeze().tolist()
        if not isinstance(recommended_internal_ids_int_list, list): 
            recommended_internal_ids_int_list = [recommended_internal_ids_int_list]
        
        recommended_movies_details = []
        for internal_id_int in recommended_internal_ids_int_list:
            info = internal_id_movie_info_map.get(internal_id_int)
            if info:
                recommended_movies_details.append({
                    "title": info['title'], "genres": info['genres'], 
                    "internal_id": internal_id_int 
                })
            else:
                 recommended_movies_details.append({
                    "title": f"Unknown Movie (Internal ID: {internal_id_int})", "genres": "N/A", 
                    "internal_id": internal_id_int
                })
        
        liked_movies_details = []
        if user_liked_original_item_ids_str_list:
            internal_liked_ids_for_prompt_np = dataset.token2id(dataset.iid_field, [str(iid) for iid in user_liked_original_item_ids_str_list])
            iid_padding_token = 0
            if hasattr(dataset.config, 'padding_idx') and isinstance(dataset.config['padding_idx'], dict) and dataset.iid_field in dataset.config['padding_idx']:
                 iid_padding_token = dataset.config['padding_idx'][dataset.iid_field]
            elif hasattr(dataset, 'padding_token') and isinstance(dataset.padding_token, dict) and dataset.iid_field in dataset.padding_token:
                 iid_padding_token = dataset.padding_token[dataset.iid_field]

            for internal_id_int_np in internal_liked_ids_for_prompt_np:
                internal_id_int = int(internal_id_int_np) 
                if internal_id_int == iid_padding_token or internal_id_int >= dataset.item_num:
                    continue 
                info = internal_id_movie_info_map.get(internal_id_int)
                if info:
                    liked_movies_details.append({
                        "title": info['title'], "genres": info['genres'], 
                        "internal_id": internal_id_int
                    })
                else:
                     liked_movies_details.append({
                        "title": f"Unknown Liked Movie (Internal ID: {internal_id_int})", "genres": "N/A",
                        "internal_id": internal_id_int
                    })
        
        return recommended_movies_details, liked_movies_details

    except Exception as e:
        print(f"Error getting recommendations: {e}")
        import traceback
        traceback.print_exc()
        return [], []

# --- Simulate a User and their History ---
SIMULATED_USER_ID_STR = "3" 
SIMULATED_LIKED_MOVIE_ORIGINAL_IDS_STR = ["50", "100", "181"] 

recommendations = [] 
liked_movie_details_for_prompt = [] 
user_full_history_details = []

if recbole_model and recbole_dataset and movie_info_map: 
    print(f"\n--- Full Positive Interaction History for User (Original ID: {SIMULATED_USER_ID_STR}) ---")
    user_full_history_details = get_user_history_details(recbole_dataset, SIMULATED_USER_ID_STR, movie_info_map)
    if user_full_history_details:
        for item in user_full_history_details[:10]: 
            print(f"- {item['title']} (Genres: {item['genres']}, Rating: {item.get('rating', 'N/A')})")
        if len(user_full_history_details) > 10:
            print(f"  ... and {len(user_full_history_details) - 10} more items.")
    else:
        print("No historical interactions found for this user or an error occurred.")

    recommendations, liked_movie_details_for_prompt = get_recommendations_for_user(
        recbole_model, 
        recbole_dataset, 
        SIMULATED_USER_ID_STR, 
        SIMULATED_LIKED_MOVIE_ORIGINAL_IDS_STR, 
        NUM_RECOMMENDATIONS,
        movie_info_map 
    )
    
    print(f"\n--- Simulated User (Original ID: {SIMULATED_USER_ID_STR}) ---")
    print("Liked Movies (specifically for LLM prompt context):") 
    if liked_movie_details_for_prompt:
        for movie in liked_movie_details_for_prompt:
            print(f"- {movie['title']} (Genres: {movie['genres']})")
    else:
        print("No specific 'liked movies for prompt' details to display.")

    print("\nTop Recommendations (after filtering liked movies for prompt):")
    if recommendations:
        for i, movie in enumerate(recommendations):
            print(f"{i+1}. {movie['title']} (Genres: {movie['genres']})")
    else:
        print("No recommendations generated or an error occurred.")
elif not movie_info_map:
    print("Movie info (from .item file) not loaded. Cannot proceed with user simulation and recommendations.")
else:
    print("RecBole model/dataset not loaded. Skipping recommendation generation.")


--- Full Positive Interaction History for User (Original ID: 3) ---
- Natural Born Killers (Genres: Action Thriller, Rating: N/A)
- Flipper (Genres: Adventure Children's, Rating: N/A)
- Space Jam (Genres: Adventure Animation Children's Comedy Fantasy, Rating: N/A)
- Evil Dead II (Genres: Action Adventure Comedy Horror, Rating: N/A)
- Lost World: Jurassic Park, The (Genres: Action Adventure Sci-Fi Thriller, Rating: N/A)
- Replacement Killers, The (Genres: Action Thriller, Rating: N/A)
- Naked (Genres: Drama, Rating: N/A)
- Father of the Bride (Genres: Comedy, Rating: N/A)
- One Fine Day (Genres: Drama Romance, Rating: N/A)
- Snow White and the Seven Dwarfs (Genres: Animation Children's Musical, Rating: N/A)
  ... and 44 more items.

--- Simulated User (Original ID: 3) ---
Liked Movies (specifically for LLM prompt context):
- Spawn (Genres: Action Adventure Sci-Fi Thriller)
- Star Wars (Genres: Action Adventure Romance Sci-Fi War)
- Natural Born Killers (Genres: Action Thriller)

Top Re

## 6. Conversational Agent with Gemini

This section defines a function to interact with the Gemini API. It will take the user's request, the generated movie recommendations (with titles and genres), and the user's liked movies (with titles and genres) to craft a conversational response that includes explanations.

In [6]:
async def get_conversational_recommendation_with_explanation(user_query, recommended_movies, liked_movies_history):
    if not SDK_CONFIGURED_SUCCESSFULLY:
        return "Sorry, I'm having trouble connecting to my brain right now. Please try again later."
    if not recommended_movies:
        return "I couldn't find any specific recommendations for you right now, but I'm always learning! Perhaps try a broader query?"

    liked_movies_str = ""
    if liked_movies_history:
        liked_movies_parts = [f"'{movie['title']}' (Genres: {movie['genres']})" for movie in liked_movies_history]
        if liked_movies_parts:
            liked_movies_str = "You previously liked: " + ", ".join(liked_movies_parts) + "."

    recs_str_parts = []
    for i, movie in enumerate(recommended_movies):
        recs_str_parts.append(f"{i+1}. '{movie['title']}' (Genres: {movie['genres']})")
    recs_str = "\n".join(recs_str_parts)

    prompt = f"""
    You are a friendly and helpful movie recommendation chatbot.
    A user has asked: "{user_query}"
    {liked_movies_str}

    Here are some movie recommendations for the user:
    {recs_str}

    Your task is to:
    1.  Present these recommendations in a conversational and engaging way.
    2.  For each recommended movie, try to provide a brief, plausible explanation for why the user might like it, ideally by connecting it to their liked movies (considering titles and genres). For example, if they liked a sci-fi movie, and you recommend another sci-fi movie, mention that. If they liked a comedy, and you recommend another, highlight that.
    3.  If you cannot find a strong direct link, provide a more general positive statement about the recommended movie.
    4.  Keep the tone light and friendly. Do not invent movies or facts not provided.
    5.  Structure your response as a single chat message.
    """

    safety_settings = {
        HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
    }
    model = genai.GenerativeModel(model_name='gemini-1.5-flash-latest', safety_settings=safety_settings)
    
    try:
        response = await model.generate_content_async(contents=[prompt])
        return response.text
    except Exception as e:
        print(f"Error interacting with Gemini API: {e}")
        return "I'm sorry, I encountered an issue while trying to generate your recommendations with explanations. Please try again."

## 7. Showcase Chatbot Interaction

Let's simulate a user asking for recommendations and see how our Gemini-powered chatbot responds, using the recommendations generated by the RecBole model.


In [7]:
async def run_chatbot_example():
    if not recommendations : 
        print("No recommendations available to run the chatbot example.")
        print("This might be due to errors in loading the RecBole model or generating recommendations.")
        return
    # No need to check liked_movie_details_for_prompt here, as the LLM prompt handles its absence

    user_chat_query = "Can you recommend some movies for me?"
    print(f"\nSimulated User Query: {user_chat_query}")
    
    chatbot_response = await get_conversational_recommendation_with_explanation(
        user_chat_query,
        recommendations,
        liked_movie_details_for_prompt 
    )
    
    print("\nChatbot Response:")
    print(chatbot_response)

if SDK_CONFIGURED_SUCCESSFULLY and recbole_model and recbole_dataset and movie_info_map and PROCESSED_ITEM_FILE_FOUND: # Changed U_ITEM_FOUND to PROCESSED_ITEM_FILE_FOUND
    if recommendations: 
        await run_chatbot_example() 
    else:
        print("\nSkipping chatbot example because no recommendations were generated (check previous steps).")
else:
    print("\nSkipping chatbot example because prerequisites (SDK, model, data, processed .item file, or initial recommendations) are missing.")


Simulated User Query: Can you recommend some movies for me?

Chatbot Response:
Hey there!  Looking for some movie magic, huh?  Based on your taste for action-packed adventures like *Spawn* and *Star Wars*, and the thrillers like *Natural Born Killers*, I've got a few suggestions that might tickle your fancy:

1. **GoldenEye:** This one's a classic action-adventure thriller, similar in vein to *Spawn* and *Star Wars*.  It's got plenty of thrilling action sequences and a compelling plot that will keep you on the edge of your seat!

2. **Touch of Evil:**  If you enjoy the darker, more suspenseful side of thrillers (like *Natural Born Killers*), you might really dig *Touch of Evil*. It's a classic film-noir thriller with a fantastically twisted plot and memorable performances.

3. **Weekend at Bernie's:**  Need a break from all the action? This is a hilarious comedy that's perfect for a lighthearted watch. It's a totally different vibe, but sometimes a good laugh is exactly what you need!

**Explanation Quality:**
- The quality of explanations generated by the LLM heavily depends on:
    - **Prompt Engineering:** How well the prompt guides the LLM to connect liked items with recommendations.
    - **Available Information:** Providing genres (as we did) is helpful. More detailed item metadata (e.g., keywords, plot summaries, actors, directors) for both liked and recommended items would allow for richer and more accurate explanations.
    - **LLM Capabilities:** The LLM's ability to infer relationships and articulate them naturally.
- Simple genre-matching is a good starting point. More sophisticated explanations might require the LLM to understand deeper thematic connections or stylistic similarities.

**Further Improvements:**
1.  **Richer Item Metadata:** Incorporate more detailed movie metadata (plot keywords, director, main actors) into the prompt for both liked and recommended items to enable more nuanced explanations.
2.  **User Feedback Loop:** In a real system, user feedback on recommendations and explanations could be used to fine-tune the LLM prompts or even the recommendation model.
3.  **Multi-Turn Conversation:** Extend the chatbot to handle follow-up questions like "Tell me more about 'Movie X'" or "Recommend something similar but less action-packed."
4.  **Negative Feedback:** Allow users to say they disliked a recommendation, and use that to refine future suggestions and explanations.
5.  **Diversity in Explanations:** Prompt the LLM to vary its explanation styles to avoid sounding repetitive.
6.  **Knowledge Cutoff:** Be mindful of the LLM's knowledge cutoff if asking for information about very recent movies not in its training data (though here we provide the movie details directly).
7.  **Fact-Checking (if LLM generates external info):** If the LLM were allowed to bring in external knowledge for explanations, a fact-checking layer might be needed. In our current setup, we are trying to constrain it to the provided data.


In [8]:
async def full_conversational_pipeline():
    global movie_info_map, recbole_model, recbole_dataset, recommendations, liked_movie_details_for_prompt, PROCESSED_ITEM_FILE_FOUND, user_full_history_details

    # Re-check PROCESSED_ITEM_FILE_FOUND at the start of the pipeline
    item_file_check_path = os.path.join(ML_100K_PROCESSED_PATH, RECDOBOLE_ITEM_FILE_NAME)
    PROCESSED_ITEM_FILE_FOUND = os.path.exists(item_file_check_path)
    if not PROCESSED_ITEM_FILE_FOUND:
        print(f"CRITICAL (Pipeline Start): RecBole's processed item file not found at {item_file_check_path}. Cannot proceed.")
        return

    # 1. Load Movie Info from RecBole's .item file
    movie_info_map = load_recbole_item_data(RECDOBOLE_ITEM_FILE_PATH)
    if not movie_info_map: return

    # 2. Load RecBole Model
    if MODEL_CHECKPOINT_PATH and os.path.exists(MODEL_CHECKPOINT_PATH):
        try:
            config, model, dataset, _, _, _ = load_data_and_model(model_file=MODEL_CHECKPOINT_PATH)
            recbole_model = model
            recbole_dataset = dataset
            recbole_config = config # Store the loaded config
            recbole_model.eval()
            recbole_model.to(torch.device('cpu'))
            print(f"Successfully loaded model: {recbole_config['model']} on device: {next(recbole_model.parameters()).device}")
        except Exception as e:
            print(f"Error loading RecBole model in full pipeline: {e}")
            recbole_model, recbole_dataset = None, None 
            return 
    else:
        print("Model checkpoint path not valid in full pipeline. Cannot proceed.")
        return

    # 3. Simulate User and Get Recommendations (also prints full history)
    if recbole_model and recbole_dataset and movie_info_map:
        # --- Fetch and print the user's actual historical positive interactions ---
        print(f"\n--- Full Positive Interaction History for User (Original ID: {SIMULATED_USER_ID_STR}) ---")
        user_full_history_details = get_user_history_details(recbole_dataset, SIMULATED_USER_ID_STR, movie_info_map)
        if user_full_history_details:
            for item in user_full_history_details[:10]: # Print up to 10 historical items
                print(f"- {item['title']} (Genres: {item['genres']}, Rating: {item.get('rating', 'N/A')})")
            if len(user_full_history_details) > 10:
                print(f"  ... and {len(user_full_history_details) - 10} more items.")
        else:
            print("No historical interactions found for this user or an error occurred.")

        # --- Get recommendations (this also prepares liked_movie_details_for_prompt) ---
        recommendations, liked_movie_details_for_prompt = get_recommendations_for_user(
            recbole_model, 
            recbole_dataset, 
            SIMULATED_USER_ID_STR, 
            SIMULATED_LIKED_MOVIE_ORIGINAL_IDS_STR, 
            NUM_RECOMMENDATIONS,
            movie_info_map
        )
        print(f"\n--- Simulated User (Original ID: {SIMULATED_USER_ID_STR}) (Full Pipeline) ---")
        print("Liked Movies (specifically for LLM prompt context):")
        if liked_movie_details_for_prompt: 
            for movie in liked_movie_details_for_prompt: print(f"- {movie['title']} (Genres: {movie['genres']})")
        else:
            print("No liked movie details to display for prompt context.")

        print("\nTop Recommendations (after filtering liked movies for prompt):")
        if recommendations:
            for i, movie in enumerate(recommendations): print(f"{i+1}. {movie['title']} (Genres: {movie['genres']})")
        else:
            print("No recommendations generated in full pipeline.")
    else:
        print("Skipping recommendation generation in full pipeline due to missing components.")
        recommendations = [] 
        liked_movie_details_for_prompt = []
        user_full_history_details = []


    # 4. Run Chatbot Example
    if SDK_CONFIGURED_SUCCESSFULLY and recommendations: 
        await run_chatbot_example()
    else:
        print("\nSkipping chatbot example in full pipeline due to missing SDK config or recommendations.")

In [9]:
await full_conversational_pipeline()

Loaded information for 1682 movies from RecBole's .item file.


  checkpoint = torch.load(model_file)
22 May 00:03    INFO  
General Hyper Parameters:
gpu_id = -1
use_gpu = True
seed = 2024
state = INFO
reproducibility = True
data_path = /home/tduricic/anaconda3/envs/llm-eval/lib/python3.10/site-packages/recbole/config/../dataset_example/ml-100k
checkpoint_dir = /mnt/c/Users/tduricic/Development/workspace/conversational-reco/recbole_saved_models/ml-100k/LightGCN_20250521_203428
show_progress = True
save_dataset = True
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = False

Training Hyper Parameters:
epochs = 100
train_batch_size = 1024
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.8, 0.1, 0.1]}, 'order': 'RO', 'group_by': 'user', 'mode': {'v

Successfully loaded model: LightGCN on device: cpu

--- Full Positive Interaction History for User (Original ID: 3) ---
- Natural Born Killers (Genres: Action Thriller, Rating: N/A)
- Flipper (Genres: Adventure Children's, Rating: N/A)
- Space Jam (Genres: Adventure Animation Children's Comedy Fantasy, Rating: N/A)
- Evil Dead II (Genres: Action Adventure Comedy Horror, Rating: N/A)
- Lost World: Jurassic Park, The (Genres: Action Adventure Sci-Fi Thriller, Rating: N/A)
- Replacement Killers, The (Genres: Action Thriller, Rating: N/A)
- Naked (Genres: Drama, Rating: N/A)
- Father of the Bride (Genres: Comedy, Rating: N/A)
- One Fine Day (Genres: Drama Romance, Rating: N/A)
- Snow White and the Seven Dwarfs (Genres: Animation Children's Musical, Rating: N/A)
  ... and 44 more items.

--- Simulated User (Original ID: 3) (Full Pipeline) ---
Liked Movies (specifically for LLM prompt context):
- Spawn (Genres: Action Adventure Sci-Fi Thriller)
- Star Wars (Genres: Action Adventure Romance S

## 9. Interactive Chat Widget

This section provides an interactive chat interface using `ipywidgets`. You can type in a movie recommendation request, and the chatbot (powered by Gemini and using the loaded RecBole model's recommendations) will respond.

**Note:**
- Ensure the previous cells (especially model loading and initial recommendation generation for the simulated user) have been run successfully in the current session. The chatbot uses the globally available `recommendations` and `liked_movie_details_for_prompt` variables for context.
- The first interaction with the widget will use the recommendations generated for `SIMULATED_USER_ID_STR`. Subsequent queries in the widget will reuse this initial recommendation context for the LLM.


In [10]:
# Global list to store chat messages for display
chat_history_display = []

# Output widget to display the chat
chat_output = widgets.Output(layout={'border': '1px solid black', 'height': '300px', 'overflow_y': 'auto', 'padding': '10px'})

# Text input for user
user_input_widget = widgets.Text(
    value='',
    placeholder='Ask for movie recommendations...',
    description='You:',
    disabled=False,
    layout=widgets.Layout(width='80%')
)

# Button to send message
send_button = widgets.Button(
    description='Send',
    button_style='success', 
    tooltip='Send your message',
    icon='paper-plane'
)

async def display_chat_message(sender, message_text):
    """Appends and displays a message in the chat_output widget, rendering Markdown."""
    chat_history_display.append(f"**{sender}:** {message_text}\n")
    with chat_output:
        clear_output(wait=True) # Clear previous output before re-rendering
        for msg in chat_history_display:
            display(Markdown(msg)) # Render each message as Markdown

async def on_send_button_clicked(b):
    """Handles the send button click event."""
    user_query = user_input_widget.value
    if not user_query.strip():
        return

    await display_chat_message("You", user_query)
    user_input_widget.value = '' # Clear input field

    # Show thinking indicator
    await display_chat_message("Chatbot", "_Thinking..._")

    # Use the globally available recommendations and liked_movies_for_prompt
    # These should have been populated by running earlier cells (e.g., CELL 10 or full_conversational_pipeline)
    if 'recommendations' not in globals() or 'liked_movie_details_for_prompt' not in globals():
        await display_chat_message("Chatbot", "Sorry, the recommendation context is not set up. Please run the data loading and recommendation cells first.")
        return

    # If recommendations list is empty, it means initial rec generation failed or user has no recs
    if not recommendations:
         await display_chat_message("Chatbot", "I don't have specific recommendations based on the initial context. Can you tell me about some movies you like?")
         return

    chatbot_response_text = await get_conversational_recommendation_with_explanation(
        user_query,
        recommendations, # Use the pre-generated recommendations for SIMULATED_USER_ID
        liked_movie_details_for_prompt # Use the liked movies of SIMULATED_USER_ID for context
    )
    
    # Remove the "Thinking..." message and add the actual response
    if chat_history_display and chat_history_display[-1].endswith("_Thinking..._\n"):
        chat_history_display.pop()
    
    await display_chat_message("Chatbot", chatbot_response_text)

send_button.on_click(lambda b: asyncio.ensure_future(on_send_button_clicked(b)))

# Display the chat interface
# Ensure previous cells (model loading, initial recommendation) have run to populate 'recommendations'
# and 'liked_movie_details_for_prompt'
if 'recommendations' in globals() and 'liked_movie_details_for_prompt' in globals():
    print("Interactive chat widget ready. Type your query below and press Send.")
    # Initial message from chatbot (optional)
    # asyncio.ensure_future(display_chat_message("Chatbot", "Hello! How can I help you with movie recommendations today? The current context is based on a simulated user."))
    display(widgets.VBox([chat_output, widgets.HBox([user_input_widget, send_button])]))
else:
    print("Please run the cells above to load the model and generate initial recommendations before using the chat widget.")
    print("(Specifically, ensure 'recommendations' and 'liked_movie_details_for_prompt' are populated).")

Interactive chat widget ready. Type your query below and press Send.


VBox(children=(Output(layout=Layout(border_bottom='1px solid black', border_left='1px solid black', border_rig…