In [None]:
class HybridHallAgent4Rec(HallAgent4Rec):
    def __init__(self, agent_usage_threshold=0.8, cold_start_threshold=5, **kwargs):
        """
        Initialize hybrid approach that combines matrix factorization with selective agent usage.
        
        Args:
            agent_usage_threshold: Score threshold above which to use matrix factorization directly
                                  (avoiding agent API calls)
            cold_start_threshold: Number of interactions below which a user is considered "cold start"
                                 and will use an agent
            **kwargs: Other parameters for HallAgent4Rec
        """
        super().__init__(**kwargs)
        self.agent_usage_threshold = agent_usage_threshold
        self.cold_start_threshold = cold_start_threshold
        self.prototype_agent = None  # Single agent used for cold start users
    
    def train(self):
        """Train without initializing agents for all users."""
        # Step 1: Cluster items
        self.cluster_items()
        
        # Step 2: Initialize matrix factorization
        self.matrix_factorization()
        
        # Step 3: Create one prototype agent for cold start users
        self._initialize_prototype_agent()
        
        print("HybridHallAgent4Rec training complete!")
    
    def _initialize_prototype_agent(self):
        """Initialize a single prototype agent for cold start cases."""
        print("Initializing prototype agent for cold start users...")
        
        # Create a generic profile based on average user
        traits = {}
        
        # Average numeric traits
        numeric_columns = [col for col in self.user_data.columns 
                          if col != 'user_id' and pd.api.types.is_numeric_dtype(self.user_data[col])]
        
        for col in numeric_columns:
            avg_value = self.user_data[col].mean()
            traits[col] = avg_value
        
        # Mode for categorical traits
        categorical_columns = [col for col in self.user_data.columns 
                             if col != 'user_id' and not pd.api.types.is_numeric_dtype(self.user_data[col])]
        
        for col in categorical_columns:
            if not self.user_data[col].empty:
                mode_value = self.user_data[col].mode().iloc[0]
                traits[col] = mode_value
        
        # Format traits as a string
        traits_str = ", ".join([f"{k}: {v}" for k, v in traits.items()])
        
        # Create agent memory
        memory = GenerativeAgentMemory(
            llm=LLM,
            memory_retriever=create_new_memory_retriever(),
            verbose=False,
            reflection_threshold=30,
        )
        
        # Create prototype agent
        self.prototype_agent = GenerativeAgent(
            name="Prototype_Agent",
            age=int(traits.get('age', 30)) if 'age' in traits else 30,
            traits=traits_str,
            status="helping users find recommendations",
            memory_retriever=create_new_memory_retriever(),
            llm=LLM,
            memory=memory,
        )
        
        # Add some generic memories about popular items
        # Find top 5 most popular items
        item_popularity = {}
        for _, row in self.interactions.iterrows():
            item_id = row['item_id']
            item_popularity[item_id] = item_popularity.get(item_id, 0) + 1
        
        popular_items = sorted(item_popularity.items(), key=lambda x: x[1], reverse=True)[:5]
        
        for item_id, _ in popular_items:
            item_row = self.item_data[self.item_data['item_id'] == item_id]
            if not item_row.empty:
                item_name = item_row.iloc[0].get('name', f"Item_{item_id}")
                memory_content = f"Many users like {item_name} (ID: {item_id})"
                
                # Add to agent memory
                self.prototype_agent.memory.add_memory(memory_content)
        
        print("Prototype agent initialized")
    
    def _is_cold_start_user(self, user_id):
        """Determine if a user is a cold start case (few interactions)."""
        user_interactions = self.interactions[self.interactions['user_id'] == user_id]
        return len(user_interactions) < self.cold_start_threshold
    
    def _create_user_specific_agent(self, user_id):
        """Create an agent specific to a user (for cold start cases)."""
        # Get user data
        user_row = self.user_data[self.user_data['user_id'] == user_id].iloc[0]
        
        # Extract user traits
        traits = {}
        for col in self.user_data.columns:
            if col != 'user_id':
                traits[col] = user_row[col]
        
        # Format traits as a string
        traits_str = ", ".join([f"{k}: {v}" for k, v in traits.items()])
        
        # Create agent memory (reusing prototype agent's memory retriever to save API calls)
        memory = GenerativeAgentMemory(
            llm=LLM,
            memory_retriever=self.prototype_agent.memory.memory_retriever,
            verbose=False,
            reflection_threshold=30,
        )
        
        # Create generative agent
        agent = GenerativeAgent(
            name=f"User_{user_id}",
            age=traits.get('age', 30),
            traits=traits_str,
            status="looking for recommendations",
            memory_retriever=self.prototype_agent.memory.memory_retriever,  # Reuse retriever
            llm=LLM,
            memory=memory,
        )
        
        # Add user interactions as memories to the agent
        user_interactions = self.interactions[self.interactions['user_id'] == user_id]
        for _, interaction in user_interactions.iterrows():
            item_id = interaction['item_id']
            # Get item details
            item_row = self.item_data[self.item_data['item_id'] == item_id]
            if not item_row.empty:
                item_name = item_row.iloc[0].get('name', f"Item_{item_id}")
                memory_content = f"I interacted with {item_name} (ID: {item_id})"
                
                # Add to agent memory
                agent.memory.add_memory(memory_content)
        
        return agent
    
    def _get_mf_recommendations(self, user_id, num_recommendations=5):
        """Get recommendations using only matrix factorization."""
        print(f"Using matrix factorization for user {user_id} (skipping agent)...")
        
        # Get user index
        user_idx = self.user_id_map[user_id]
        
        # Get prediction scores for all items
        predicted_scores = np.dot(self.user_embeddings[user_idx], self.item_embeddings.T)
        
        # Apply hallucination penalty from the hallucination scores
        if self.hallucination_scores is not None:
            hallucination_penalties = self.hallucination_scores[user_idx]
            predicted_scores -= self.lambda_h * hallucination_penalties * predicted_scores
        
        # Get top items
        top_item_indices = np.argsort(predicted_scores)[::-1][:num_recommendations]
        
        # Create recommendation list
        recommendations = []
        for item_idx in top_item_indices:
            item_id = self.idx_to_item_id[item_idx]
            item_row = self.item_data[self.item_data['item_id'] == item_id]
            if not item_row.empty:
                item_info = {}
                for col in item_row.columns:
                    item_info[col] = item_row.iloc[0][col]
                recommendations.append(item_info)
        
        return recommendations
    
    def generate_recommendations(self, user_id, num_recommendations=5):
        """Generate recommendations using either matrix factorization or agent-based approach."""
        # Check if user exists
        if user_id not in self.user_id_map:
            print(f"User {user_id} not found. Cannot generate recommendations.")
            return []
        
        # Check if user is a cold start case
        is_cold_start = self._is_cold_start_user(user_id)
        
        if is_cold_start:
            print(f"User {user_id} is a cold start case. Using agent-based approach.")
            # Create a user-specific agent for this cold start user
            agent = self._create_user_specific_agent(user_id)
            
            # Use full agent-based recommendation approach
            # This follows same pattern as original implementation but with the new agent
            
            # Construct query using agent
            user_traits = agent.traits
            relevant_memories = agent.memory.memory_retriever.get_relevant_documents("What do I like?")
            memory_contents = " ".join([mem.page_content for mem in relevant_memories])
            query = f"User traits: {user_traits}. User memories: {memory_contents}"
            query_embedding = embeddings_model.embed_query(query)
            
            knowledge_base, top_cluster = self.construct_knowledge_base(user_id)
            retrieved_items = self.retrieve_items(user_id, query_embedding, knowledge_base)
            
            # Process agent recommendations
            # ...
            
            # Format retrieved items for prompt
            item_descriptions = "\n".join([f"- {item['name']}" for item in retrieved_items[:10]])
            
            # Create prompt for LLM
            prompt = f"""
            You are a recommendation system for a user with the following traits:
            {agent.traits}
            
            Based on the user's profile and past behavior, you have retrieved the following relevant items:
            {item_descriptions}
            
            Please recommend {num_recommendations} items from the list above that would be most relevant for this user.
            For each recommendation, provide a brief explanation of why it matches the user's preferences.
            
            IMPORTANT: You must ONLY recommend items from the provided list. Do not suggest any items that are not in the list.
            
            Format your response as:
            1. [Item Name]: [Explanation]
            2. [Item Name]: [Explanation]
            ...
            """
            
            # Generate recommendations
            response = LLM.invoke(prompt)
            recommendations_text = response.content
            
            # Process recommendations and check for hallucinations
            # This follows the same pattern as the original implementation
            # ...
            
            # Extract recommended items and check for hallucinations
            recommended_items = []
            lines = recommendations_text.strip().split('\n')
            for line in lines:
                if line.strip() and any(char.isdigit() for char in line[:5]):
                    parts = line.split(':', 1)
                    if len(parts) > 0:
                        item_name_part = parts[0].strip()
                        item_name = item_name_part.split('.', 1)[1].strip() if '.' in item_name_part else item_name_part
                        recommended_items.append(item_name)
            
            # Check for hallucinations (items not in retrieved set)
            retrieved_item_names = [item['name'] for item in retrieved_items]
            hallucinations = []
            valid_recommendations = []
            
            for item_name in recommended_items:
                is_hallucination = True
                for retrieved_name in retrieved_item_names:
                    if item_name.lower() in retrieved_name.lower() or retrieved_name.lower() in item_name.lower():
                        is_hallucination = False
                        for item in retrieved_items:
                            if item['name'].lower() in item_name.lower() or item_name.lower() in item['name'].lower():
                                valid_recommendations.append(item)
                                break
                        break
                
                if is_hallucination:
                    hallucinations.append(item_name)
            
            # Fill with top predicted items if needed
            if len(valid_recommendations) < num_recommendations:
                # This follows the same pattern as the original implementation
                # ...
                user_idx = self.user_id_map[user_id]
                cluster_items = self.items_by_cluster[top_cluster]
                
                # Get predicted scores
                item_scores = [(idx, np.dot(self.user_embeddings[user_idx], self.item_embeddings[idx])) 
                            for idx in cluster_items]
                
                # Add additional items
                recommended_ids = [item['item_id'] for item in valid_recommendations]
                additional_items = []
                
                for item_idx, score in sorted(item_scores, key=lambda x: x[1], reverse=True):
                    item_id = self.idx_to_item_id[item_idx]
                    if item_id not in recommended_ids:
                        item_row = self.item_data[self.item_data['item_id'] == item_id]
                        if not item_row.empty:
                            item_info = {}
                            for col in item_row.columns:
                                item_info[col] = item_row.iloc[0][col]
                            additional_items.append(item_info)
                            if len(valid_recommendations) + len(additional_items) >= num_recommendations:
                                break
                
                valid_recommendations.extend(additional_items)
            
            # Limit to requested number
            recommendations = valid_recommendations[:num_recommendations]
            
        else:
            # For non-cold-start users, use matrix factorization directly
            recommendations = self._get_mf_recommendations(user_id, num_recommendations)
        
        print(f"Generated {len(recommendations)} recommendations for user {user_id}")
        return recommendations