In [None]:
# @title Setup and Imports

import google.generativeai as genai
import numpy as np
import os
from pinecone import Pinecone, ServerlessSpec
import boto3
from botocore.exceptions import ClientError 
import json 

import time
import statistics
from typing import Dict, List, Tuple, Any 
from dataclasses import dataclass, asdict 
from datetime import datetime, timedelta 
import matplotlib.pyplot as plt
import seaborn as sns 



def get_secret(sec):
    secret_name = sec
    region_name = "us-east-2"
    # Create a Secrets Manager client
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name
    )
    # AKIAV62IILI7ED3DDIHR   7UpChxKz74Y7DbhaqSUotZzWFvbJPP6tXdTpyZvI
    try:
        get_secret_value_response = client.get_secret_value(
            SecretId=secret_name
        )
    except ClientError as e:
        # For a list of exceptions thrown, see
        # https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
        raise e
    secret = json.loads(get_secret_value_response['SecretString'])
    return secret['pinecone'], secret['gemini'] 

pineconeAPIKEY, geminiAPIKEY = get_secret("strongholdLabs") 
genai.configure(api_key=geminiAPIKEY) 
model = genai.GenerativeModel('gemini-1.5-flash')
# Chat history and truncation settings
chat_history = []
MAX_CHAT_HISTORY_LENGTH = 4 # Number of recent turns to keep in active memory, so # of messages by user + chatbot = 2 * chat_history_length (I think) 

#----------------------------------------------------------------------------------------------------

#initialize pinecone client
pc = Pinecone(api_key=pineconeAPIKEY)  
PINECONE_INDEX_NAME = "chatbot-memory-integrated" 
if not pc.has_index(PINECONE_INDEX_NAME):
    pc.create_index_for_model(
        name = PINECONE_INDEX_NAME,      
        cloud="aws",
        region="us-east-1",
        embed = { 
            "model": "llama-text-embed-v2", #Does this allow for auto embedding without needing an embedding function?? I think so.  
            "field_map": {"text": "message_text"}
        }
    )
    print(f"Created new Pinecone index '{PINECONE_INDEX_NAME}' with integrated embedding model")
else:
    print(f" :( thats not good. maybe use the basic vdb instead")  

index = pc.Index(PINECONE_INDEX_NAME)
print(f"Successfully connected to Pinecone index with integrated embeddings")

#----------------------------------------------------------------------------------------------------


#i dont think we need this part below. 

'''
#define Pinecone index name and dimension
PINECONE_INDEX_NAME = "chatbot-memory"
EMBEDDING_DIMENSION = 768  # standard dimension for many embedding models

#create Pinecone index if it doesn't exist
try:
    # check if index already exists
    if PINECONE_INDEX_NAME not in pinecone.list_indexes():
        # create new index with proper schema
        pinecone.create_index(
            name=PINECONE_INDEX_NAME,
            dimension=EMBEDDING_DIMENSION,
            metric="cosine",  # use cosine similarity for text embeddings
            spec=pinecone.Spec(
                serverless=pinecone.ServerlessSpec(
                    cloud="aws",
                    region="us-east-1"
                )
            )
        )
        print(f"Created new Pinecone index '{PINECONE_INDEX_NAME}'")
    else:
        print(f"Using existing Pinecone index '{PINECONE_INDEX_NAME}'")
    
    # connect to the index
    index = pinecone.Index(PINECONE_INDEX_NAME)
    print(f"Successfully connected to Pinecone index")
    
except Exception as e:
    print(f"Error initializing Pinecone: {e}")
    # fallback to simulated VDB if Pinecone fails
    index = None  
'''

 :( thats not good. maybe use the basic vdb instead
Successfully connected to Pinecone index with integrated embeddings


'\n#define Pinecone index name and dimension\nPINECONE_INDEX_NAME = "chatbot-memory"\nEMBEDDING_DIMENSION = 768  # standard dimension for many embedding models\n\n#create Pinecone index if it doesn\'t exist\ntry:\n    # check if index already exists\n    if PINECONE_INDEX_NAME not in pinecone.list_indexes():\n        # create new index with proper schema\n        pinecone.create_index(\n            name=PINECONE_INDEX_NAME,\n            dimension=EMBEDDING_DIMENSION,\n            metric="cosine",  # use cosine similarity for text embeddings\n            spec=pinecone.Spec(\n                serverless=pinecone.ServerlessSpec(\n                    cloud="aws",\n                    region="us-east-1"\n                )\n            )\n        )\n        print(f"Created new Pinecone index \'{PINECONE_INDEX_NAME}\'")\n    else:\n        print(f"Using existing Pinecone index \'{PINECONE_INDEX_NAME}\'")\n\n    # connect to the index\n    index = pinecone.Index(PINECONE_INDEX_NAME)\n    print(

In [None]:
# Pinecone Vector Database Class (replacing SimulatedVectorDB) 
class PineconeVectorDB:
    def __init__(self, index): 
        self.index = index  # Real Pinecone index connection
        self.item_counter = 0  # Track item IDs

    def add_item(self, item_id: int, embedding: list, text_content: str):
        """Adds an item with text content - Pinecone handles embedding generation."""
        # with integrated models, we send text directly: no need for embeddings
        # Enhanced metadata for better retrieval 
        metadata = {
            "item_id": str(item_id),
            "timestamp": str(np.datetime64('now')), 
            "message_type": "chat_message",
            "text_content": text_content,
            "word_count": len(text_content.split()),  # For chunking optimization 
            "keywords": self._extract_keywords(text_content)  # For hybrid search  
        }  
        
        # Upsert with text: Pinecone generates embeddings automatically
        self.index.upsert(
            vectors=[(str(item_id), {"message_text": text_content}, metadata)]
        )   
        
        print(f"Added item ID {item_id} to Pinecone VDB (Text: '{text_content[:30]}...')")
    
    def _detect_topic(self, conversation_text: str) -> str:
        # Use Gemini to detect the main topic of a conversation turn. 
        try:
            prompt = f"""
            Analyze this conversation text and identify the main topic in 2-3 words:
            "{conversation_text}"
            
            Return only the topic, no explanation. Examples: "AI programming", "Python debugging", "API integration"
            """
            
            response = self.model.generate_content(prompt)
            topic = response.text.strip()
            return topic if topic else "general"
        except Exception as e:
            print(f"Topic detection failed: {e}")
            return "general"
    
    def _expand_query_semantically(self, query_text: str, chat_history: list) -> list:
        #Use Gemini to expand queries semantically based on intent and context.
        try:
            # Get recent conversation context
            recent_context = ""
            if chat_history:
                recent_messages = [entry['parts'][0] for entry in chat_history[-4:]]
                recent_context = " ".join(recent_messages)
            
            prompt = f"""
            Given this user query: "{query_text}"
            And recent conversation context: "{recent_context}"
            
            Generate 2-3 semantically equivalent ways to express the same intent.
            Focus on different phrasings that capture the same meaning.
            
            Return each expansion on a new line, no numbering or explanation.
            """
            
            response = self.model.generate_content(prompt)
            expansions = [line.strip() for line in response.text.split('\n') if line.strip()]
            
            # Always include original query
            all_queries = [query_text] + expansions
            return all_queries[:4]  
            
        except Exception as e:
            print(f"Semantic query expansion failed: {e}")
            return [query_text]
    
    
    # IM NOT SURE IF THIS IS GOOD  
    def _score_semantic_relevance(self, query: str, chunk_text: str) -> float:
        # Use Gemini to score semantic relevance between query and chunk. 
        try:  
            prompt = f"""
            Rate the semantic relevance between this query and content on a scale of 0.0 to 1.0.
            
            Query: "{query}"
            Content: "{chunk_text}"
            
            Consider:
            - Does the content directly answer the query?
            - Is the content contextually relevant?
            - Does it provide useful information for the query?
            
            Return only the score (0.0 to 1.0), no explanation.
            """
            response = self.model.generate_content(prompt)
            try:
                score = float(response.text.strip())
                return max(0.0, min(1.0, score))  # Clamp between 0.0 and 1.0
            except ValueError:
                return 0.5  # Default score if parsing fails
                
        except Exception as e:
            print(f"Semantic scoring failed: {e}")
            return 0.5
    
    def _create_conversation_chunk(self, user_message: str, assistant_response: str, turn_number: int) -> dict:
        # Create a semantically coherent conversation chunk. 
        # Combine user and assistant messages
        full_conversation = f"User: {user_message}\nAssistant: {assistant_response}"
        # Detect topic using Gemini
        topic = self._detect_topic(full_conversation)
        
        # Create enhanced metadata
        metadata = {
            "conversation_turn": turn_number,
            "topic": topic,
            "user_message": user_message,
            "assistant_response": assistant_response,
            "full_conversation": full_conversation,
            "message_type": "conversation_chunk",
            "timestamp": str(np.datetime64('now')),
            "word_count": len(full_conversation.split()),
            "chunk_id": f"turn_{turn_number}_{topic.lower().replace(' ', '_')}"
        }
        
        return full_conversation, metadata
    
    def query(self, query_text: str, k: int = 1) -> list:  
        # Advanced semantic search using Gemini for genuine understanding. 
        try:
            # Semantic query expansion
            expanded_queries = self._expand_query_semantically(query_text, chat_history or [])
            
            all_results = [] 
            
            for expanded_query in expanded_queries:
                # Get initial results from Pinecone
                query_results = self.index.query(
                    vector={"message_text": expanded_query},
                    top_k = k * 3,  # Get more candidates for semantic filtering
                    include_metadata=True
                )
                
                # Semantic relevance scoring using Gemini
                scored_results = []
                for match in query_results.matches:
                    chunk_text = match.metadata.get('full_conversation', '')
                    if chunk_text:
                        # Use Gemini to score semantic relevance
                        semantic_score = self._score_semantic_relevance(expanded_query, chunk_text)
                        
                        # Apply a threshold (0.75 for business use I think) 
                        if semantic_score >= 0.75:
                            scored_results.append({
                                'id': match.id,
                                'text_content': chunk_text,
                                'semantic_score': semantic_score,
                                'pinecone_score': match.score,
                                'topic': match.metadata.get('topic', 'unknown'),
                                'turn_number': match.metadata.get('conversation_turn', 0),
                                'method': 'semantic_search'
                            })
                
                all_results.extend(scored_results)
            
            # Remove duplicates and sort by semantic relevance
            unique_results = self._deduplicate_and_select_top(all_results, k * 2)
            
            # Final ranking: semantic score (70%) + topic relevance (20%) + recency (10%)
            for result in unique_results:
                topic_boost = 0.2 if result['topic'] in query_text.lower() else 0.0
                recency_boost = 0.1 * (1.0 / (result['turn_number'] + 1))  # Newer turns get slight boost
                
                result['final_score'] = (
                    result['semantic_score'] * 0.7 + 
                    topic_boost + 
                    recency_boost
                )
            
            # Sort by final score and return top k
            unique_results.sort(key=lambda x: x['final_score'], reverse=True)
            final_results = unique_results[:k]
            
            # Format results for return
            retrieved_results = []
            for result in final_results:
                retrieved_results.append({
                    'id': result['id'],
                    'content': result['text_content'],
                    'relevance_score': result['final_score'],
                    'topic': result['topic'],
                    'conversation_turn': result['turn_number'],
                    'method': result['method']
                })
            
            return retrieved_results
            
        except Exception as e:
            print(f"Error in semantic search: {e}")
            return []
    
    def add_conversation_turn(self, user_message: str, assistant_response: str, turn_number: int):
        # Add a complete conversation turn as a semantic chunk. 
        try:
            # Create conversation chunk with enhanced metadata
            chunk_text, metadata = self._create_conversation_chunk(user_message, assistant_response, turn_number)
            
            # Add to Pinecone because it already handles embedding generation 
            self.index.upsert(
                vectors=[(metadata['chunk_id'], {"message_text": chunk_text}, metadata)]
            )
            
            print(f"Added conversation turn {turn_number} (Topic: {metadata['topic']}) to VDB")
            
        except Exception as e:
            print(f"Error adding conversation turn: {e}")
    
    '''
    # Re-ranking Method. NEED HELP CUZ IDK IF ITS GOOD 
    def _rerank_results(self, matches, original_query: str) -> list:
        #Re-rank results based on multiple relevance factors. 
        
        reranked = []
        
        for match in matches:
            score = match.score
            text_content = match.metadata.get('text_content', '')
            
            # Boost score based on keyword overlap
            keyword_boost = self._calculate_keyword_overlap(original_query, text_content)
            
            # Boost score based on recency (newer messages slightly preferred)
            recency_boost = self._calculate_recency_boost(match.metadata.get('timestamp', ''))
            
            # Boost score based on content length (prefer meaningful responses)
            length_boost = self._calculate_length_boost(text_content)
            
            # Combined score with weights
            final_score = (score * 0.6 + keyword_boost * 0.2 + recency_boost * 0.1 + length_boost * 0.1)
            
            reranked.append({
                'id': match.id,
                'text_content': text_content,
                'score': final_score,
                'original_score': score,
                'method': 'hybrid_reranked'
            })
        
        # Sort in decreasing order by final score    
        reranked.sort(key = lambda x: x['score'], reverse = True)  
        return reranked

    # Helper methods for re-ranking 
    def _extract_keywords(self, text: str) -> list: 
        # Extract important keywords from text. 
        # Simple keyword extraction (when making this more advanced, use NLP libraries but idk)
        stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'this', 'that', 'these', 'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them'}
        words = text.lower().split()   
        keywords = [word for word in words if word not in stop_words and len(word) > 2]
        return keywords[:10]  # Limit to top 10 keywords   

    def _calculate_keyword_overlap(self, query: str, content: str) -> float:
        # Calculate keyword overlap between the query and the content. 
        query_keywords = set(self._extract_keywords(query))
        content_keywords = set(self._extract_keywords(content))
        
        if not query_keywords: # Size = 0 
            return 0.0
        
        overlap = len(query_keywords.intersection(content_keywords))
        return overlap / len(query_keywords) 
    ''' 

    def _deduplicate_and_select_top(self, results: list, k: int) -> list:
        # Remove duplicates and select top k results. 
        seen_ids = set()
        unique_results = []
        
        for result in results:
            if result['id'] not in seen_ids:
                seen_ids.add(result['id'])
                unique_results.append(result)
        
        return unique_results[:k]

    #Deleting the current model 
    def reset(self):
        #Deletes all vectors from pinecone index.
        try:
            # Delete all vectors from the index
            self.index.delete(delete_all=True)
            self.item_counter = 0
            print("Pinecone index has been reset (all vectors deleted).")
        except Exception as e:
            print(f"Error resetting Pinecone index: {e}. Please try again.")


def start_chat(): 
    # Global instance of a real Pinecone VDB   
    vdb1 = PineconeVectorDB(index) 
    print("Using real Pinecone Vector Database with integrated embeddings")
    # Global counter for VDB item IDs 
    vdb_index_counter = 0 
    return vdb1, vdb_index_counter 

# Initialize the Pinecone VDB
vdb, vdb_index_counter = start_chat()  

Using real Pinecone Vector Database with integrated embeddings
Pinecone index is already built and optimized for similarity search.


In [None]:
# Chat History Management and Truncation

def manage_chat_history(user_message, system_response):
    global chat_history, vdb_index_counter, vdb
    
    chat_history.append({"role": "user", "parts": [user_message]})
    chat_history.append({"role": "model", "parts": [system_response]})
    
    print(f"\n--- After adding new messages ---")
    print(f"Current chat_history length: {len(chat_history)}")
    
    if len(chat_history) > MAX_CHAT_HISTORY_LENGTH * 2:
        num_to_remove = len(chat_history) - MAX_CHAT_HISTORY_LENGTH * 2
        messages_to_store = chat_history[:num_to_remove]
        chat_history = chat_history[num_to_remove:]
        
        print(f"\n--- Truncation initiated ---")
        print(f"Number of messages to truncate: {num_to_remove}")
        
        # Store conversation turns (user + assistant pairs)
        turn_number = 1
        for i in range(0, len(messages_to_store), 2):
            if i + 1 < len(messages_to_store):
                user_msg = messages_to_store[i]["parts"][0]
                assistant_msg = messages_to_store[i + 1]["parts"][0]
                
                # Use new conversation chunk method
                vdb.add_conversation_turn(user_msg, assistant_msg, turn_number)
                turn_number += 1
        
        print(f"Truncated {num_to_remove} messages and stored {turn_number-1} conversation turns in VDB.")
        print(f"New chat_history length after truncation: {len(chat_history)}")
    
    return chat_history

# --- Test the functionality ---
'''
print("--- Initializing for test ---")

# Simulate some conversation
manage_chat_history("Hello, how are you?", "I'm doing well, thank you!")
manage_chat_history("What is the capital of France?", "Paris is the capital of France.")

# This should trigger truncation and VDB storage
manage_chat_history("Can you tell me more about AI?", "AI is a rapidly evolving field.")

# Simulate retrieval from VDB
print(f"\n--- Retrieval from VDB ---")

# In a real RAG, you'd query with an embedding of the current user input.
'''

DEBUG: VDB has been reset.
--- Initializing for test ---

--- After adding new messages ---
Current chat_history length: 2

--- After adding new messages ---
Current chat_history length: 4

--- Truncation initiated ---
Number of messages to truncate: 2
DEBUG: Added item ID 0 to VDB (Text: 'Hello, how are you?...')
DEBUG: Added item ID 1 to VDB (Text: 'I'm doing well, thank you!...')
DEBUG: Simulated VDB building with 10 trees. Index is now ready for efficient search.
Truncated 2 messages and stored in VDB.
New chat_history length after truncation: 2
Total items in VDB: 2

--- After adding new messages ---
Current chat_history length: 4

--- Truncation initiated ---
Number of messages to truncate: 2
DEBUG: Added item ID 2 to VDB (Text: 'What is the capital of France?...')
DEBUG: Added item ID 3 to VDB (Text: 'Paris is the capital of France...')
Truncated 2 messages and stored in VDB.
New chat_history length after truncation: 2
Total items in VDB: 4

--- Retrieval from VDB ---
Retrieved 

In [None]:
# evaluation metrics - step 8 

@dataclass
class EvaluationMetrics:
    # comprehensive evaluation metrics for the RAG system assessment. 
    
    # Retrieval Quality Metrics
    retrieval_relevance_score: float = 0.0
    retrieval_diversity_score: float = 0.0
    retrieval_coverage_score: float = 0.0
    
    # Response Quality Metrics
    factual_accuracy_score: float = 0.0
    coherence_score: float = 0.0
    contextual_appropriateness_score: float = 0.0
    hallucination_detection_score: float = 0.0
    
    # Memory Performance Metrics
    memory_retention_score: float = 0.0
    context_consistency_score: float = 0.0
    long_term_memory_effectiveness: float = 0.0
    
    # System Performance Metrics
    response_latency: float = 0.0
    retrieval_latency: float = 0.0
    total_tokens_used: int = 0
    
    # Timestamp and metadata
    timestamp: datetime = None
    conversation_turn: int = 0
    user_query: str = ""
    
    def __post_init__(self):
        if self.timestamp is None:
            self.timestamp = datetime.now()


In [None]:
# Conflict Resolution - Step 5 
def resolve_context_conflicts(chat_history: list, retrieved_context: list) -> list:
    # Resolve conflicts and redundancies using semantic understanding. 
    if not retrieved_context:
        return [] 
    # Use Gemini to identify and resolve conflicts
    clean_context = []
    
    for chunk in retrieved_context:
        # Check if this chunk adds new information compared to current chat history
        if not is_redundant_with_chat_history(chunk, chat_history):
            clean_context.append(chunk)
    return clean_context

def is_redundant_with_chat_history(chunk: dict, chat_history: list) -> bool:
    # Use Gemini to determine if chunk is redundant with current chat history. 
    if not chat_history:
        return False
    try:
        # Get recent chat history (last 4 messages to avoid token bloat)
        recent_history = chat_history[-4:] if len(chat_history) >= 4 else chat_history
        recent_text = " ".join([entry['parts'][0] for entry in recent_history])
        
        # Create prompt for Gemini to analyze redundancy
        prompt = f"""
        Analyze if this conversation chunk provides NEW information compared to recent chat history.
        
        RECENT CHAT HISTORY:
        {recent_text}
        
        CONVERSATION CHUNK TO EVALUATE:
        {chunk['content']}
        
        Consider:
        1. Does the chunk introduce new topics not discussed recently?
        2. Does it provide additional details on existing topics?
        3. Does it offer different perspectives or solutions?
        4. Is it a follow-up or expansion of recent discussions?
        
        Return ONLY: "REDUNDANT" if the chunk adds no new value, or "NEW_INFO" if it provides new information.
        """
        # Use Gemini to evaluate (you'll need to pass the model instance)
        response = model.generate_content(prompt)
        result = response.text.strip().upper()
        
        return result == "REDUNDANT"
        
    except Exception as e:
        print(f"Error in redundancy check: {e}")
        # Default to keeping the chunk if we can't evaluate
        return False

def resolve_semantic_conflicts(retrieved_context: list) -> list:
    # Resolve conflicts between different retrieved chunks using semantic understanding. 
    if len(retrieved_context) <= 1:
        return retrieved_context
    
    # Sort by relevance score
    sorted_chunks = sorted(retrieved_context, key=lambda x: x['relevance_score'], reverse=True)
    
    # Use Gemini to identify and resolve conflicts
    resolved_chunks = []
    
    for i, current_chunk in enumerate(sorted_chunks):
        is_conflicting = False
        
        # Check against already accepted chunks
        for accepted_chunk in resolved_chunks:
            if has_semantic_conflict(current_chunk, accepted_chunk):
                # Resolve conflict by keeping the more relevant one
                if current_chunk['relevance_score'] > accepted_chunk['relevance_score']:
                    # Replace the less relevant chunk
                    resolved_chunks.remove(accepted_chunk)
                    resolved_chunks.append(current_chunk)
                is_conflicting = True
                break
        
        if not is_conflicting:
            resolved_chunks.append(current_chunk)
    
    return resolved_chunks

def has_semantic_conflict(chunk1: dict, chunk2: dict) -> bool:
    # Use Gemini to detect semantic conflicts between two chunks. 
    
    try:
        prompt = f"""
        Analyze if these two conversation chunks contain CONFLICTING information.
        
        CHUNK 1:
        {chunk1['content']}
        
        CHUNK 2:
        {chunk2['content']}
        
        Consider:
        1. Do they contradict each other on facts, opinions, or solutions?
        2. Do they provide different answers to the same question?
        3. Do they have opposing viewpoints on the same topic?
        4. Are they discussing the same subject but with conflicting information?
        
        Return ONLY: "CONFLICT" if there are contradictions, or "NO_CONFLICT" if they're compatible.
        """
        
        response = model.generate_content(prompt)
        result = response.text.strip().upper()
        
        return result == "CONFLICT"
        
    except Exception as e:
        print(f"Error in conflict detection: {e}")
        # Default to no conflict if we can't evaluate
        return False


In [None]:
# @title Main Chat Functionality
#----------------------------------------------------------------------------------------------------

def construct_enhanced_context_prompt_with_metadata(results: list, user_message: str, chat_history: list) -> str:
    # Construct context prompt with intelligent conflict resolution and window management. 
    
    if not results:
        return ""
    
    # Resolve conflicts and redundancies using semantic understanding
    clean_context = resolve_context_conflicts(chat_history, results)
    
    # Resolve conflicts between retrieved chunks
    resolved_context = resolve_semantic_conflicts(clean_context)
    
    # Manage context window with semantic optimization
    optimized_context = manage_context_window(chat_history, resolved_context, user_message)
    
    # Build the enhanced prompt
    context_prompt = "=== PREVIOUS RELEVANT CONVERSATIONS ===\n"
    context_prompt += "Use the following context to provide informed, contextual responses:\n\n"
    
    # Sort by relevance score for better context ordering
    sorted_results = sorted(optimized_context, key=lambda x: x['relevance_score'], reverse=True)
    
    for i, result in enumerate(sorted_results, 1):
        context_prompt += f"--- Conversation {i} (Relevance: {result['relevance_score']:.2f}) ---\n"
        context_prompt += f"Topic: {result['topic']}\n"
        context_prompt += f"Turn: {result['conversation_turn']}\n"
        context_prompt += f"Content: {result['content']}\n\n"
    
    # Add current query analysis
    context_prompt += "=== CURRENT QUERY ANALYSIS ===\n"
    context_prompt += f"Query: {user_message}\n"
    
    # Identify related topics from retrieved context
    if optimized_context:
        topics = [r['topic'] for r in optimized_context]
        context_prompt += f"Related Topics: {', '.join(set(topics))}\n\n"
    
    # Add specific instructions
    context_prompt += "=== RESPONSE GUIDELINES ===\n"
    context_prompt += "1. If the current query relates to previous topics, acknowledge the connection\n"
    context_prompt += "2. Build upon previous discussions when relevant\n"
    context_prompt += "3. Maintain consistency with previously provided information\n"
    context_prompt += "4. Reference specific previous conversations when helpful\n"
    context_prompt += "5. If you notice any conflicting information, clarify the discrepancy\n\n"
    
    return context_prompt

def manage_context_window(chat_history: list, retrieved_context: list, user_message: str, max_tokens: int = 4000) -> list:
    # Manage context window with semantic understanding of content importance. 
    
    # Estimate tokens (rough approximation: 1 word = 1.3 tokens but idk) 
    def estimate_tokens(text: str) -> int:
        return int(len(text.split()) * 1.3)
    
    # Calculate available tokens
    current_prompt = f"User: {user_message}\nAssistant:"
    available_tokens = max_tokens - estimate_tokens(current_prompt)
    
    # Start with most relevant context
    sorted_context = sorted(retrieved_context, key=lambda x: x['relevance_score'], reverse=True)
    
    # Use Gemini to prioritize content when approaching token limits
    if len(sorted_context) > 3:  # Only optimize if we have many chunks
        sorted_context = optimize_context_for_tokens(sorted_context, available_tokens)
    
    return sorted_context

def optimize_context_for_tokens(context_chunks: list, available_tokens: int) -> list:
    # Use Gemini to intelligently select most valuable context within token limits.   
    
    try:
        # Create a summary of all chunks for Gemini to evaluate
        chunk_summaries = []
        for i, chunk in enumerate(context_chunks):
            summary = f"Chunk {i+1}: {chunk['topic']} (Score: {chunk['relevance_score']:.2f})"
            chunk_summaries.append(summary)
        
        prompt = f"""
        Given these conversation chunks, select the TOP 3 most valuable ones for context.
        
        AVAILABLE CHUNKS:
        {chr(10).join(chunk_summaries)}
        
        Selection criteria:
        1. Highest relevance to current conversation
        2. Most unique and non-overlapping information
        3. Best coverage of different topics/aspects
        
        Return ONLY the chunk numbers (e.g., "1,3,5") in order of priority.
        """
        
        response = model.generate_content(prompt)
        selected_indices = [int(x.strip()) - 1 for x in response.text.split(',') if x.strip().isdigit()]
        
        # Return selected chunks
        selected_chunks = [context_chunks[i] for i in selected_indices if 0 <= i < len(context_chunks)]
        
        return selected_chunks[:3]  # Ensure we don't exceed 3 chunks
        
    except Exception as e:
        print(f"Error in context optimization: {e}")
        # Fallback to simple relevance-based selection
        return context_chunks[:3]  

def chat_with_gemini_with_memory():
    print("Welcome to the Pseudo-infinite Chatbot! Type 'EXIT' to end the conversation.")
    
    while True:
        user_message = input("You: ")
        if user_message.strip() == 'EXIT': 
            print("Chat ended.")
            break

        # Get full results with metadata
        results = vdb.query(user_message, k=3, chat_history=chat_history)
        
        # Construct enhanced context prompt
        context_prompt = construct_enhanced_context_prompt_with_metadata(results, user_message)
        
        # Build the full prompt
        full_prompt = f"{context_prompt}=== CURRENT CONVERSATION ===\n"
        for entry in chat_history:
            role = "User" if entry["role"] == "user" else "Assistant"
            full_prompt += f"{role}: {entry['parts'][0]}\n"
        full_prompt += f"User: {user_message}\nAssistant:"

        try:
            response = model.generate_content(
                contents=[{"role": "user", "parts": [full_prompt]}]
            )
            gemini_response = response.candidates[0].content.parts[0].text
            print(f"Gemini: {gemini_response}")

            manage_chat_history(user_message, gemini_response)

        except Exception as e:
            print(f"An error occurred: {e}")
            print("Please check your API key and ensure the model is accessible.")

# Note: Before running chat_with_gemini_with_memory(), ensure you have:
# - Initialized your Gemini 'model' object (e.g., import google.generativeai as genai; model = genai.GenerativeModel('gemini-pro'))
# - Set your Google API Key (genai.configure(api_key="YOUR_API_KEY"))
# - Run the SimulatedVectorDB class definition and the initial global variable setup (vdb, vdb_built_flag, vdb_index_counter, chat_history)
# - Run the manage_chat_history function definition.

# Example of how you would set up the globals and start the chat:
# import google.generativeai as genai
# import os
# genai.configure(api_key=os.environ.get("GOOGLE_API_KEY")) # Or your actual key
# model = genai.GenerativeModel('gemini-pro')

'''
chat_history = []
vdb = SimulatedVectorDB() # Re-initialize vdb if needed for a fresh chat session
vdb_built_flag = False
vdb_index_counter = 0 
'''

chat_with_gemini_with_memory()