In [6]:
# @title Setup and Imports

import google.generativeai as genai
import numpy as np
import os
from pinecone import Pinecone, ServerlessSpec

# Configure your API key 
APIKEY = "AIzaSyA6bnQK33HYRspkrOi8-Q54bq4E4RgcHj4" 
# It's recommended to store your API key securely, e.g., in environment variables
# For this example, we'll assume it's directly set.
# Replace "YOUR_API_KEY" with your actual Google API Key
# os.environ["GOOGLE_API_KEY"] = "YOUR_API_KEY"
genai.configure(api_key=APIKEY) 

# Initialize the Generative Model
model = genai.GenerativeModel('gemini-1.5-flash')

# Chat history and truncation settings
chat_history = []
MAX_CHAT_HISTORY_LENGTH = 4 # Number of recent turns to keep in active memory, so # of messages by user + chatbot = 2 * chat_history_length (I think) 


#----------------------------------------------------------------------------------------------------

# Configure the Pinecone API key 
pineconeAPIKEY = "pcsk_4xPCuD_6WLyNse1TcP3qmHKhMPKNdRCSQwU8g9MvVVVorLvLjWVboM3CwA76YnNzu8yd4V" 

#initialize pinecone client
pc = Pinecone(api_key=pineconeAPIKEY)  
PINECONE_INDEX_NAME = "chatbot-memory-integrated" 
if not pc.has_index(PINECONE_INDEX_NAME):
    pc.create_index_for_model(
        name = PINECONE_INDEX_NAME,      
        cloud="aws",
        region="us-east-1",
        embed = { 
            "model": "llama-text-embed-v2", #Does this allow for auto embedding without needing an embedding function?? I think so.  
            "field_map": {"text": "message_text"}
        }
    )
    print(f"Created new Pinecone index '{PINECONE_INDEX_NAME}' with integrated embedding model")
else:
    print(f" :( thats not good. maybe use the basic vdb instead")  

index = pc.Index(PINECONE_INDEX_NAME)
print(f"Successfully connected to Pinecone index with integrated embeddings")

#----------------------------------------------------------------------------------------------------


#i dont think we need this part below. 

'''
#define Pinecone index name and dimension
PINECONE_INDEX_NAME = "chatbot-memory"
EMBEDDING_DIMENSION = 768  # standard dimension for many embedding models

#create Pinecone index if it doesn't exist
try:
    # check if index already exists
    if PINECONE_INDEX_NAME not in pinecone.list_indexes():
        # create new index with proper schema
        pinecone.create_index(
            name=PINECONE_INDEX_NAME,
            dimension=EMBEDDING_DIMENSION,
            metric="cosine",  # use cosine similarity for text embeddings
            spec=pinecone.Spec(
                serverless=pinecone.ServerlessSpec(
                    cloud="aws",
                    region="us-east-1"
                )
            )
        )
        print(f"Created new Pinecone index '{PINECONE_INDEX_NAME}'")
    else:
        print(f"Using existing Pinecone index '{PINECONE_INDEX_NAME}'")
    
    # connect to the index
    index = pinecone.Index(PINECONE_INDEX_NAME)
    print(f"Successfully connected to Pinecone index")
    
except Exception as e:
    print(f"Error initializing Pinecone: {e}")
    # fallback to simulated VDB if Pinecone fails
    index = None  
'''

 :( thats not good. maybe use the basic vdb instead
Successfully connected to Pinecone index with integrated embeddings


'\n#define Pinecone index name and dimension\nPINECONE_INDEX_NAME = "chatbot-memory"\nEMBEDDING_DIMENSION = 768  # standard dimension for many embedding models\n\n#create Pinecone index if it doesn\'t exist\ntry:\n    # check if index already exists\n    if PINECONE_INDEX_NAME not in pinecone.list_indexes():\n        # create new index with proper schema\n        pinecone.create_index(\n            name=PINECONE_INDEX_NAME,\n            dimension=EMBEDDING_DIMENSION,\n            metric="cosine",  # use cosine similarity for text embeddings\n            spec=pinecone.Spec(\n                serverless=pinecone.ServerlessSpec(\n                    cloud="aws",\n                    region="us-east-1"\n                )\n            )\n        )\n        print(f"Created new Pinecone index \'{PINECONE_INDEX_NAME}\'")\n    else:\n        print(f"Using existing Pinecone index \'{PINECONE_INDEX_NAME}\'")\n\n    # connect to the index\n    index = pinecone.Index(PINECONE_INDEX_NAME)\n    print(

In [9]:
# Pinecone Vector Database Class (replacing SimulatedVectorDB) 
class PineconeVectorDB:
    def __init__(self, index): 
        self.index = index  # Real Pinecone index connection
        self._is_built = True  # Pinecone indexes are always built
        self.item_counter = 0  # Track item IDs

    def add_item(self, item_id: int, embedding: list, text_content: str):
        """Adds an item with text content - Pinecone handles embedding generation."""
        # with integrated models, we send text directly: no need for embeddings
        # Enhanced metadata for better retrieval 
        metadata = {
            "item_id": str(item_id),
            "timestamp": str(np.datetime64('now')), 
            "message_type": "chat_message",
            "text_content": text_content,
            "word_count": len(text_content.split()),  # For chunking optimization 
            "keywords": self._extract_keywords(text_content)  # For hybrid search  
        }  
        
        # Upsert with text: Pinecone generates embeddings automatically
        self.index.upsert(
            vectors=[(str(item_id), {"message_text": text_content}, metadata)]
        )   
        
        print(f"Added item ID {item_id} to Pinecone VDB (Text: '{text_content[:30]}...')")

    def build(self, n_trees: int = 10): #I think a default value for n_trees is ok 
        #Pinecone indexes are automatically built: no manual build needed.
        print(f"Pinecone index is already built and optimized for similarity search.")
        self._is_built = True # We probably don't need the function I think.

    def query(self, query_text: str, k: int = 1) -> list:  
        #Real advanced semantic search using Pinecone's integrated model.
        #Returns the k most semantically similar items hybrid search techniques. 
        
        if not self._is_built:
            print("Pinecone index is always ready for querying. We continue as needed.")

        try:  
            # Query with text directly: Pinecone handles embedding generation; Query expansion 
            expanded_queries = self._expand_query(query_text)   
            
            # Hybrid search combining semantic and keyword matching
            all_results = []
            
            for expanded_query in expanded_queries:
                # Semantic search with expanded query
                query_results = self.index.query(
                    vector={"message_text": expanded_query},
                    top_k=k * 2,  # Get more results for re-ranking
                    include_metadata=True
                )
                
                # Re-rank results based on multiple factors   
                reranked_results = self._rerank_results(query_results.matches, query_text)
                all_results.extend(reranked_results)
            
            # Deduplicate and select top k results
            final_results = self._deduplicate_and_select_top(all_results, k)
            
            # Format results
            retrieved_results = []
            for result in final_results:
                item_id = result['id']
                text_content = result['text_content']
                similarity_score = result['score']
                retrieval_method = result.get('method', 'semantic')
                retrieved_results.append(f"Retrieved content (ID: {item_id}, Score: {similarity_score:.3f}, Method: {retrieval_method}): '{text_content}'")
            
            return retrieved_results
            
        except Exception as e:
            print(f"Error querying Pinecone: {e}. Please try again.")
            return [] 

    # Query Expansion 
    def _expand_query(self, query_text: str) -> list: 
        #Expand query with synonyms and related terms for better retrieval.
        expanded_queries = [query_text]  # Original query 
        
        # Simple synonym expansion (when making this more advanced, use a proper thesaurus API which idk)   
        synonyms = {   
            "what": ["tell me about", "explain", "describe"],
            "how": ["explain how", "describe the process", "what is the method"],
            "why": ["explain why", "what is the reason", "what causes"],
            "when": ["at what time", "during what period", "what date"],
            "where": ["in what location", "at what place", "which place"],
            "who": ["which person", "what person", "tell me about"]
        }
        
        # Expand query with synonyms
        words = query_text.lower().split()
        for word in words:
            if word in synonyms:
                for synonym in synonyms[word]:
                    expanded_query = query_text.lower().replace(word, synonym)
                    if expanded_query not in expanded_queries:
                        expanded_queries.append(expanded_query)
        
        # Add question variations 
        if query_text.endswith('?'):
            # Remove question mark and add as statement
            statement_query = query_text[:-1].strip()
            if statement_query not in expanded_queries:
                expanded_queries.append(statement_query)
        
        return expanded_queries[:3]  # Limit to 3 expanded queries

    # Re-ranking Method. NEED HELP CUZ IDK IF ITS GOOD 
    def _rerank_results(self, matches, original_query: str) -> list:
        #Re-rank results based on multiple relevance factors. 
        
        reranked = []
        
        for match in matches:
            score = match.score
            text_content = match.metadata.get('text_content', '')
            
            # Boost score based on keyword overlap
            keyword_boost = self._calculate_keyword_overlap(original_query, text_content)
            
            # Boost score based on recency (newer messages slightly preferred)
            recency_boost = self._calculate_recency_boost(match.metadata.get('timestamp', ''))
            
            # Boost score based on content length (prefer meaningful responses)
            length_boost = self._calculate_length_boost(text_content)
            
            # Combined score with weights
            final_score = (score * 0.6 + keyword_boost * 0.2 + recency_boost * 0.1 + length_boost * 0.1)
            
            reranked.append({
                'id': match.id,
                'text_content': text_content,
                'score': final_score,
                'original_score': score,
                'method': 'hybrid_reranked'
            })
        
        # Sort in decreasing order by final score    
        reranked.sort(key = lambda x: x['score'], reverse = True)  
        return reranked

    # Helper methods for re-ranking 
    def _extract_keywords(self, text: str) -> list: 
        # Extract important keywords from text. 
        # Simple keyword extraction (when making this more advanced, use NLP libraries but idk)
        stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'this', 'that', 'these', 'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them'}
        words = text.lower().split()   
        keywords = [word for word in words if word not in stop_words and len(word) > 2]
        return keywords[:10]  # Limit to top 10 keywords   

    def _calculate_keyword_overlap(self, query: str, content: str) -> float:
        # Calculate keyword overlap between the query and the content. 
        query_keywords = set(self._extract_keywords(query))
        content_keywords = set(self._extract_keywords(content))
        
        if not query_keywords: # Size = 0 
            return 0.0
        
        overlap = len(query_keywords.intersection(content_keywords))
        return overlap / len(query_keywords) 

    def _calculate_recency_boost(self, timestamp: str) -> float:
        # Calculate recency boost for newer messages.
        try:
            # Simple recency boost (newer messages get slight preference)
            # I don't know if we need time calculations to be accounted for in this function 
            return 0.10  # Small boost for recency
        except:       
            return 0.0

    def _calculate_length_boost(self, text: str) -> float:
        # Calculate boost based on content length. 
        word_count = len(text.split())  
        # Prefer medium-length responses (not too short, not too long)  
        if 5 <= word_count <= 50: # Adjust accordingly (I think)  
            return 0.2
        elif word_count > 50:
            return 0.1
        else:
            return 0.0

    def _deduplicate_and_select_top(self, results: list, k: int) -> list:
        # Remove duplicates and select top k results. 
        seen_ids = set()
        unique_results = []
        
        for result in results:
            if result['id'] not in seen_ids:
                seen_ids.add(result['id'])
                unique_results.append(result)
        
        return unique_results[:k]

    #Deleting the current model 
    def reset(self):
        #Deletes all vectors from pinecone index.
        try:
            # Delete all vectors from the index
            self.index.delete(delete_all=True)
            self.item_counter = 0
            print("Pinecone index has been reset (all vectors deleted).")
        except Exception as e:
            print(f"Error resetting Pinecone index: {e}. Please try again.")


def start_chat(): 
    # Global instance of a real Pinecone VDB   
    vdb1 = PineconeVectorDB(index) 
    print("Using real Pinecone Vector Database with integrated embeddings")
    vdb1.build() 
    # Global counter for VDB item IDs 
    vdb_index_counter = 0 

    # No need for get_embedding function - Pinecone handles it automatically
    def get_embedding(text: str) -> list:
        """With integrated models, Pinecone handles embedding generation automatically."""
        # Return None since we don't need to generate embeddings manually
        return None  
    
    return vdb1, vdb_index_counter, get_embedding

# Initialize the Pinecone VDB
vdb, vdb_index_counter, get_embedding = start_chat() 

Using real Pinecone Vector Database with integrated embeddings
Pinecone index is already built and optimized for similarity search.


In [None]:
# Simulated Vector Database Class   
'''
class SimulatedVectorDB:
    def __init__(self):
        self.items = []  # Stores (item_id, embedding, text_content) tuples
        self._is_built = False # Internal flag to simulate index building state
        self._embedding_dim = None # To store embedding dimension from first added item

    def add_item(self, item_id: int, embedding: list, text_content: str):
        """Adds an item with its embedding and original text content to the VDB."""
        if not isinstance(embedding, list):
            embedding = embedding.tolist() # Ensure numpy arrays are converted to list
        self.items.append((item_id, embedding, text_content))
        print(f"DEBUG: Added item ID {item_id} to VDB (Text: '{text_content[:30]}...')")


    def build(self, n_trees: int):
        """Simulates building the VDB index. For this simple model, it just sets a flag."""
        if self._is_built:
            raise RuntimeError("You can't build a built index. Call reset() first if you want to rebuild.")

        if not self.items:
            print("WARNING: Building VDB on an empty set of items. Add items first.")

        # In a real VDB, this would optimize the search structure
        print(f"DEBUG: Simulated VDB building with {n_trees} trees. Index is now ready for efficient search.")
        self._is_built = True

    def query(self, query_embedding: list, k: int = 1) -> list:
        """
        Simulates querying the VDB. For simplicity, returns the k most recent items
        or tries to find a specific ID if the query 'embedding' (here, we'll use a placeholder for actual query)
        contains specific instructions (like 'ID: X').

        In a real scenario, this would perform a similarity search.
        Here, we'll implement a very basic "retrieval by ID" or "latest items" for demonstration.
        """
        if not self._is_built:
            # In a real VDB, query might fail or be inefficient if not built
            print("WARNING: Querying VDB before it's built. Performance will be poor in a real system.")

        if not self.items: 
            return []

        # Simple simulation: return the latest k items if no specific ID is requested
        # For a true RAG, you'd calculate cosine similarity between query_embedding
        # and all stored embeddings, then return the top-k most similar.

        # To simulate finding by "ID: X" in user's example, we'll look for a string in query_embedding
        # This is a hack for the 'Retrieved memory based on query (ID: 3)' prompt.
        # A real query_embedding would be a list of floats.

        # Let's just return the last k items added for now as a general "retrieval".
        # A true "query for ID X" would be handled differently if the user wants specific ID retrieval.
        retrieved_results = []
        for i in range(1, min(k + 1, len(self.items) + 1)):
            item_id, _, text_content = self.items[-i]
            retrieved_results.append(f"Retrieved content (ID: {item_id}): '{text_content}'")

        return retrieved_results

    def reset(self):
        """Resets the VDB, allowing it to be built again."""
        self.items = []
        self._is_built = False
        self._embedding_dim = None
        print("DEBUG: VDB has been reset.")


def start_chat(): 
    # Global instance of our simulated VDB
    vdb = SimulatedVectorDB()
    # Global flag to ensure vdb.build() is called only once
    vdb_built_flag = False
    # Global counter for VDB item IDs
    vdb_index_counter = 0

    # (Assume get_embedding function is defined elsewhere, e.g., from a model)
    # Placeholder for get_embedding if it's not defined in the scope of execution
    def get_embedding(text: str) -> list:
        """Placeholder for an actual embedding generation function."""
        # In a real scenario, this would call a model to get a vector embedding
        # For simulation, just return a dummy embedding based on text length or a hash
        return [float(ord(c)) / 100 for c in text[:10]] # A dummy, simple embedding
start_chat() 
'''

In [None]:
# @title Chat History Management and Truncation
# Initialize global chat_history (if not already done)
chat_history = [] 


def manage_chat_history(user_message, system_response):
    global chat_history, vdb_index_counter, vdb_built_flag, vdb # Ensure vdb is global here

    chat_history.append({"role": "user", "parts": [user_message]})
    chat_history.append({"role": "model", "parts": [system_response]})

    print(f"\n--- After adding new messages ---")
    print(f"Current chat_history length: {len(chat_history)}")

    if len(chat_history) > MAX_CHAT_HISTORY_LENGTH * 2:
        num_to_remove = len(chat_history) - MAX_CHAT_HISTORY_LENGTH * 2
        messages_to_store = chat_history[:num_to_remove]
        chat_history = chat_history[num_to_remove:]

        print(f"\n--- Truncation initiated ---")
        print(f"Number of messages to truncate: {num_to_remove}")

        # Add items to VDB
        for message in messages_to_store:
            text_content = message["parts"][0]
            embedding = get_embedding(text_content) # Assuming get_embedding is available
            vdb.add_item(vdb_index_counter, embedding, text_content) # Pass text_content
            vdb_index_counter += 1

        # Only build the index ONCE, after the first batch of items is added
        if not vdb_built_flag and vdb_index_counter > 0:
            vdb.build(10) # Build the index with 10 trees (parameter is simulated)
            vdb_built_flag = True # Set flag to True so it's not built again

        print(f"Truncated {num_to_remove} messages and stored in VDB.")
        print(f"New chat_history length after truncation: {len(chat_history)}")
        print(f"Total items in VDB: {vdb_index_counter}")

# --- Test the functionality ---
# Reset VDB for a clean test run
vdb.reset()
vdb_built_flag = False
vdb_index_counter = 0
chat_history = [] # Reset chat history too for a clean start

print("--- Initializing for test ---")

# Simulate some conversation
manage_chat_history("Hello, how are you?", "I'm doing well, thank you!")
manage_chat_history("What is the capital of France?", "Paris is the capital of France.")

# This should trigger truncation and VDB storage
manage_chat_history("Can you tell me more about AI?", "AI is a rapidly evolving field.")

# Simulate retrieval from VDB
print(f"\n--- Retrieval from VDB ---")
# In a real RAG, you'd query with an embedding of the current user input.
# Here, we'll just demonstrate retrieving the latest few items from the VDB for completeness.
query_results = vdb.query(get_embedding("dummy query for retrieval"), k=2) # k=2 to get more than one
if query_results:
    for result in query_results:
        print(result)
else:
    print("VDB is empty. No retrieval.")

DEBUG: VDB has been reset.
--- Initializing for test ---

--- After adding new messages ---
Current chat_history length: 2

--- After adding new messages ---
Current chat_history length: 4

--- Truncation initiated ---
Number of messages to truncate: 2
DEBUG: Added item ID 0 to VDB (Text: 'Hello, how are you?...')
DEBUG: Added item ID 1 to VDB (Text: 'I'm doing well, thank you!...')
DEBUG: Simulated VDB building with 10 trees. Index is now ready for efficient search.
Truncated 2 messages and stored in VDB.
New chat_history length after truncation: 2
Total items in VDB: 2

--- After adding new messages ---
Current chat_history length: 4

--- Truncation initiated ---
Number of messages to truncate: 2
DEBUG: Added item ID 2 to VDB (Text: 'What is the capital of France?...')
DEBUG: Added item ID 3 to VDB (Text: 'Paris is the capital of France...')
Truncated 2 messages and stored in VDB.
New chat_history length after truncation: 2
Total items in VDB: 4

--- Retrieval from VDB ---
Retrieved 

In [None]:
# @title Main Chat Functionality
'''
Given to me 


# Define retrieve_from_vdb function
def retrieve_from_vdb(query_text: str) -> list:
    """
    Retrieves relevant context from the VDB based on the query text.
    Returns a list of strings, where each string is the content of a retrieved memory.
    """
    global vdb # Ensure vdb is accessible

    if not vdb._is_built and vdb.items:
        print("WARNING: Querying VDB before it's built. Results may not be optimal.")

    if not vdb.items:
        print("--- VDB is empty. No retrieval. ---")
        return []

    query_embedding = get_embedding(query_text)
    # The vdb.query method now returns formatted strings, so we need to parse them.
    raw_retrieved_items = vdb.query(query_embedding, k=2) # Retrieve top 2 items
    
    extracted_contexts = []
    if raw_retrieved_items:
        print("--- Retrieval from VDB ---")
        print(f"Query for retrieval: '{query_text}'")
        for item_str in raw_retrieved_items:
            # Example format from vdb.query: "Retrieved content (ID: 1): 'Hello, how are you?'"
            # We want to extract just 'Hello, how are you?'
            parts = item_str.split("': '")
            if len(parts) > 1:
                text_content = parts[1].rstrip("'") # Get the text part and remove trailing single quote
                extracted_contexts.append(text_content)
            else:
                extracted_contexts.append(item_str) # Fallback if format is unexpected

    # Print the actual content retrieved
    if extracted_contexts:
        print("Retrieved content:")
        for context in extracted_contexts:
            print(f"- '{context}'")

    return extracted_contexts
'''

#----------------------------------------------------------------------------------------------------

def retrieve_from_vdb(query_text: str) -> list:       
    # Better retrieval with multiple strategies and intelligent context selection.
    # Returns a list of strings with the most relevant past conversations.
    
    global vdb

    if not vdb._is_built:   
        print("Pinecone index is always ready for querying: No worries")   

    # Contextual query enhancement
    enhanced_query = _enhance_query_with_context(query_text, chat_history)
    
    # Use advanced semantic search
    raw_retrieved_items = vdb.query(enhanced_query, k=3)  # get more results for better selection 
    
    extracted_contexts = []
    if raw_retrieved_items:
        print("Advanced Retrieval from Pinecone VDB")
        print(f"Original query: '{query_text}'")
        print(f"Enhanced query: '{enhanced_query}'")
        
        for item_str in raw_retrieved_items:   
            # Parse results with method information
            parts = item_str.split("': '")
            if len(parts) > 1:
                text_content = parts[1].rstrip("'")
                extracted_contexts.append(text_content)
            else:
                extracted_contexts.append(item_str)

    # Intelligent context selection
    if extracted_contexts: 
        print("Retrieved content (with better ranking):")
        for i, context in enumerate(extracted_contexts, 1):
            print(f"{i}. '{context}'")
        
        # Select most relevant contexts based on current conversation
        selected_contexts = _select_most_relevant_contexts(extracted_contexts, query_text, chat_history)
        return selected_contexts

    return []    

# Query enhancement with conversation context
def _enhance_query_with_context(query_text: str, chat_history: list) -> str:
    # Enhance query using recent conversation context. 
    if not chat_history:
        return query_text
    
    # Extract recent conversation topics
    recent_messages = []
    for entry in chat_history[-4:]:  # Last 4 messages
        recent_messages.append(entry['parts'][0])
    
    # Add context keywords to query
    context_keywords = _extract_context_keywords(recent_messages)
    if context_keywords:
        enhanced_query = f"{query_text} {context_keywords}"
        return enhanced_query
    
    return query_text

# Context keyword extraction
def _extract_context_keywords(messages: list) -> str:
    # Extract important keywords from recent conversation.    
    all_text = " ".join(messages)
    words = all_text.lower().split()
    
    # Simple keyword frequency analysis
    word_freq = {}
    for word in words:
        if len(word) > 3 and word.isalpha():
            word_freq[word] = word_freq.get(word, 0) + 1
    
    # Get most frequent words
    keywords = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:3]
    return " ".join([word for word, freq in keywords])

# Intelligent context selection
def _select_most_relevant_contexts(contexts: list, query_text: str, chat_history: list) -> list:
    #Select the most relevant contexts based on current conversation. 
    if len(contexts) <= 2: 
        return contexts
    
    # Score contexts based on relevance to current conversation
    scored_contexts = []
    for context in contexts:
        score = _calculate_context_relevance(context, query_text, chat_history)
        scored_contexts.append((context, score))
    
    # Sort by relevance score and return top 2
    scored_contexts.sort(key=lambda x: x[1], reverse=True)
    return [context for context, score in scored_contexts[:2]]

# Context relevance calculation
def _calculate_context_relevance(context: str, query_text: str, chat_history: list) -> float:
    # Calculate how relevant a context is to the current conversation.  
    score = 0.0
    
    # Query similarity
    query_words = set(query_text.lower().split())
    context_words = set(context.lower().split())
    query_overlap = len(query_words.intersection(context_words)) / max(len(query_words), 1)
    score += query_overlap * 0.5
    
    # Recent conversation similarity
    if chat_history:
        recent_text = " ".join([entry['parts'][0] for entry in chat_history[-2:]])
        recent_words = set(recent_text.lower().split())
        recent_overlap = len(recent_words.intersection(context_words)) / max(len(recent_words), 1)
        score += recent_overlap * 0.3
    
    # Content quality (prefer longer, more informative responses)
    if len(context.split()) > 10: #idk if it should be > 10 
        score += 0.2
    
    return score

#----------------------------------------------------------------------------------------------------

def chat_with_gemini_with_memory():
    print("Welcome to the Pseudo-infinite Chatbot! Type 'exit' to end the conversation.")
    # Assuming 'model' is defined and initialized elsewhere (e.g., gemini-pro)
    # Assuming 'chat_history' is globally initialized as an empty list

    while True:
        user_message = input("You: ")
        if user_message.lower() == 'exit':
            print("Chat ended.")
            break

        # Retrieve relevant context from VDB
        # This function will now correctly return a list of text strings
        retrieved_context = retrieve_from_vdb(user_message)
        context_prompt = ""
        if retrieved_context:
            context_prompt = "The user has previously discussed the following:\n" + "\n".join(retrieved_context) + "\n"

        # Construct the full prompt for the LLM
        # Prepend the retrieved context to the system prompt
        full_prompt = f"{context_prompt}Current conversation:\n"
        for entry in chat_history:
            role = "User" if entry["role"] == "user" else "Model"
            full_prompt += f"{role}: {entry['parts'][0]}\n"
        full_prompt += f"User: {user_message}\nModel:"


        try:
            # Generate Gemini's response
            # Ensure 'model' is properly initialized (e.g., genai.GenerativeModel('gemini-pro'))
            response = model.generate_content(
                contents=[{"role": "user", "parts": [full_prompt]}]
            )
            gemini_response = response.candidates[0].content.parts[0].text
            print(f"Gemini: {gemini_response}")

            # Manage chat history (truncate and store in VDB if needed)
            manage_chat_history(user_message, gemini_response)

        except Exception as e:
            print(f"An error occurred: {e}")
            print("Please check your API key and ensure the model is accessible.")

# Note: Before running chat_with_gemini_with_memory(), ensure you have:
# - Initialized your Gemini 'model' object (e.g., import google.generativeai as genai; model = genai.GenerativeModel('gemini-pro'))
# - Set your Google API Key (genai.configure(api_key="YOUR_API_KEY"))
# - Run the SimulatedVectorDB class definition and the initial global variable setup (vdb, vdb_built_flag, vdb_index_counter, chat_history)
# - Run the manage_chat_history function definition.

# Example of how you would set up the globals and start the chat:
# import google.generativeai as genai
# import os
# genai.configure(api_key=os.environ.get("GOOGLE_API_KEY")) # Or your actual key
# model = genai.GenerativeModel('gemini-pro')

'''
chat_history = []
vdb = SimulatedVectorDB() # Re-initialize vdb if needed for a fresh chat session
vdb_built_flag = False
vdb_index_counter = 0 
'''

chat_with_gemini_with_memory()