In [None]:
# @title Setup and Imports

import google.generativeai as genai
import numpy as np
import os
import pinecone

# Configure your API key 
pineconeAPIKEY = "pcsk_4xPCuD_6WLyNse1TcP3qmHKhMPKNdRCSQwU8g9MvVVVorLvLjWVboM3CwA76YnNzu8yd4V" 

# Configure your API key 
APIKEY = "AIzaSyA6bnQK33HYRspkrOi8-Q54bq4E4RgcHj4" 
# It's recommended to store your API key securely, e.g., in environment variables
# For this example, we'll assume it's directly set.
# Replace "YOUR_API_KEY" with your actual Google API Key
# os.environ["GOOGLE_API_KEY"] = "YOUR_API_KEY"
genai.configure(api_key=APIKEY) 

# Initialize the Generative Model
model = genai.GenerativeModel('gemini-1.5-flash')

# Chat history and truncation settings
chat_history = []
MAX_CHAT_HISTORY_LENGTH = 2 # Number of recent turns to keep in active memory

Collecting pinecone
  Downloading pinecone-7.3.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pinecone-plugin-assistant<2.0.0,>=1.6.0 (from pinecone)
  Downloading pinecone_plugin_assistant-1.7.0-py3-none-any.whl.metadata (28 kB)
Collecting pinecone-plugin-interface<0.0.8,>=0.0.7 (from pinecone)
  Downloading pinecone_plugin_interface-0.0.7-py3-none-any.whl.metadata (1.2 kB)
Collecting packaging<25.0,>=24.2 (from pinecone-plugin-assistant<2.0.0,>=1.6.0->pinecone)
  Downloading packaging-24.2-py3-none-any.whl.metadata (3.2 kB)
Downloading pinecone-7.3.0-py3-none-any.whl (587 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.6/587.6 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pinecone_plugin_assistant-1.7.0-py3-none-any.whl (239 kB)
Downloading packaging-24.2-py3-none-any.whl (65 kB)
Downloading pinecone_plugin_interface-0.0.7-py3-none-any.whl (6.2 kB)
Installing collected packages: pinecone-plugin-interface, packaging, pinecone-plugin-assistant, 

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Simulated Vector Database Class
class SimulatedVectorDB:
    def __init__(self):
        self.items = []  # Stores (item_id, embedding, text_content) tuples
        self._is_built = False # Internal flag to simulate index building state
        self._embedding_dim = None # To store embedding dimension from first added item

    def add_item(self, item_id: int, embedding: list, text_content: str):
        """Adds an item with its embedding and original text content to the VDB."""
        if not isinstance(embedding, list):
            embedding = embedding.tolist() # Ensure numpy arrays are converted to list
        self.items.append((item_id, embedding, text_content))
        print(f"DEBUG: Added item ID {item_id} to VDB (Text: '{text_content[:30]}...')")


    def build(self, n_trees: int):
        """Simulates building the VDB index. For this simple model, it just sets a flag."""
        if self._is_built:
            raise RuntimeError("You can't build a built index. Call reset() first if you want to rebuild.")

        if not self.items:
            print("WARNING: Building VDB on an empty set of items. Add items first.")

        # In a real VDB, this would optimize the search structure
        print(f"DEBUG: Simulated VDB building with {n_trees} trees. Index is now ready for efficient search.")
        self._is_built = True

    def query(self, query_embedding: list, k: int = 1) -> list:
        """
        Simulates querying the VDB. For simplicity, returns the k most recent items
        or tries to find a specific ID if the query 'embedding' (here, we'll use a placeholder for actual query)
        contains specific instructions (like 'ID: X').

        In a real scenario, this would perform a similarity search.
        Here, we'll implement a very basic "retrieval by ID" or "latest items" for demonstration.
        """
        if not self._is_built:
            # In a real VDB, query might fail or be inefficient if not built
            print("WARNING: Querying VDB before it's built. Performance will be poor in a real system.")

        if not self.items: 
            return []

        # Simple simulation: return the latest k items if no specific ID is requested
        # For a true RAG, you'd calculate cosine similarity between query_embedding
        # and all stored embeddings, then return the top-k most similar.

        # To simulate finding by "ID: X" in user's example, we'll look for a string in query_embedding
        # This is a hack for the 'Retrieved memory based on query (ID: 3)' prompt.
        # A real query_embedding would be a list of floats.

        # Let's just return the last k items added for now as a general "retrieval".
        # A true "query for ID X" would be handled differently if the user wants specific ID retrieval.
        retrieved_results = []
        for i in range(1, min(k + 1, len(self.items) + 1)):
            item_id, _, text_content = self.items[-i]
            retrieved_results.append(f"Retrieved content (ID: {item_id}): '{text_content}'")

        return retrieved_results

    def reset(self):
        """Resets the VDB, allowing it to be built again."""
        self.items = []
        self._is_built = False
        self._embedding_dim = None
        print("DEBUG: VDB has been reset.")


# Global instance of our simulated VDB
vdb = SimulatedVectorDB()
# Global flag to ensure vdb.build() is called only once
vdb_built_flag = False
# Global counter for VDB item IDs
vdb_index_counter = 0

# (Assume get_embedding function is defined elsewhere, e.g., from a model)
# Placeholder for get_embedding if it's not defined in the scope of execution
def get_embedding(text: str) -> list:
    """Placeholder for an actual embedding generation function."""
    # In a real scenario, this would call a model to get a vector embedding
    # For simulation, just return a dummy embedding based on text length or a hash
    return [float(ord(c)) / 100 for c in text[:10]] # A dummy, simple embedding

In [None]:
# @title Chat History Management and Truncation
# Assuming MAX_CHAT_HISTORY_LENGTH is defined elsewhere
MAX_CHAT_HISTORY_LENGTH = 1 # For quick truncation testing

# Initialize global chat_history (if not already done)
chat_history = []


def manage_chat_history(user_message, system_response):
    global chat_history, vdb_index_counter, vdb_built_flag, vdb # Ensure vdb is global here

    chat_history.append({"role": "user", "parts": [user_message]})
    chat_history.append({"role": "model", "parts": [system_response]})

    print(f"\n--- After adding new messages ---")
    print(f"Current chat_history length: {len(chat_history)}")

    if len(chat_history) > MAX_CHAT_HISTORY_LENGTH * 2:
        num_to_remove = len(chat_history) - MAX_CHAT_HISTORY_LENGTH * 2
        messages_to_store = chat_history[:num_to_remove]
        chat_history = chat_history[num_to_remove:]

        print(f"\n--- Truncation initiated ---")
        print(f"Number of messages to truncate: {num_to_remove}")

        # Add items to VDB
        for message in messages_to_store:
            text_content = message["parts"][0]
            embedding = get_embedding(text_content) # Assuming get_embedding is available
            vdb.add_item(vdb_index_counter, embedding, text_content) # Pass text_content
            vdb_index_counter += 1

        # Only build the index ONCE, after the first batch of items is added
        if not vdb_built_flag and vdb_index_counter > 0:
            vdb.build(10) # Build the index with 10 trees (parameter is simulated)
            vdb_built_flag = True # Set flag to True so it's not built again

        print(f"Truncated {num_to_remove} messages and stored in VDB.")
        print(f"New chat_history length after truncation: {len(chat_history)}")
        print(f"Total items in VDB: {vdb_index_counter}")

# --- Test the functionality ---
# Reset VDB for a clean test run
vdb.reset()
vdb_built_flag = False
vdb_index_counter = 0
chat_history = [] # Reset chat history too for a clean start

print("--- Initializing for test ---")

# Simulate some conversation
manage_chat_history("Hello, how are you?", "I'm doing well, thank you!")
manage_chat_history("What is the capital of France?", "Paris is the capital of France.")

# This should trigger truncation and VDB storage
manage_chat_history("Can you tell me more about AI?", "AI is a rapidly evolving field.")

# Simulate retrieval from VDB
print(f"\n--- Retrieval from VDB ---")
# In a real RAG, you'd query with an embedding of the current user input.
# Here, we'll just demonstrate retrieving the latest few items from the VDB for completeness.
query_results = vdb.query(get_embedding("dummy query for retrieval"), k=2) # k=2 to get more than one
if query_results:
    for result in query_results:
        print(result)
else:
    print("VDB is empty. No retrieval.")

In [None]:
# @title Main Chat Functionality

# Define retrieve_from_vdb function
def retrieve_from_vdb(query_text: str) -> list:
    """
    Retrieves relevant context from the VDB based on the query text.
    Returns a list of strings, where each string is the content of a retrieved memory.
    """
    global vdb # Ensure vdb is accessible

    if not vdb._is_built and vdb.items:
        print("WARNING: Querying VDB before it's built. Results may not be optimal.")

    if not vdb.items:
        print("--- VDB is empty. No retrieval. ---")
        return []

    query_embedding = get_embedding(query_text)
    # The vdb.query method now returns formatted strings, so we need to parse them.
    raw_retrieved_items = vdb.query(query_embedding, k=2) # Retrieve top 2 items
    
    extracted_contexts = []
    if raw_retrieved_items:
        print("--- Retrieval from VDB ---")
        print(f"Query for retrieval: '{query_text}'")
        for item_str in raw_retrieved_items:
            # Example format from vdb.query: "Retrieved content (ID: 1): 'Hello, how are you?'"
            # We want to extract just 'Hello, how are you?'
            parts = item_str.split("': '")
            if len(parts) > 1:
                text_content = parts[1].rstrip("'") # Get the text part and remove trailing single quote
                extracted_contexts.append(text_content)
            else:
                extracted_contexts.append(item_str) # Fallback if format is unexpected

    # Print the actual content retrieved
    if extracted_contexts:
        print("Retrieved content:")
        for context in extracted_contexts:
            print(f"- '{context}'")

    return extracted_contexts

def chat_with_gemini_with_memory():
    print("Welcome to the Pseudo-infinite Chatbot! Type 'exit' to end the conversation.")
    # Assuming 'model' is defined and initialized elsewhere (e.g., gemini-pro)
    # Assuming 'chat_history' is globally initialized as an empty list

    while True:
        user_message = input("You: ")
        if user_message.lower() == 'exit':
            print("Chat ended.")
            break

        # Retrieve relevant context from VDB
        # This function will now correctly return a list of text strings
        retrieved_context = retrieve_from_vdb(user_message)
        context_prompt = ""
        if retrieved_context:
            context_prompt = "The user has previously discussed the following:\n" + "\n".join(retrieved_context) + "\n"

        # Construct the full prompt for the LLM
        # Prepend the retrieved context to the system prompt
        full_prompt = f"{context_prompt}Current conversation:\n"
        for entry in chat_history:
            role = "User" if entry["role"] == "user" else "Model"
            full_prompt += f"{role}: {entry['parts'][0]}\n"
        full_prompt += f"User: {user_message}\nModel:"


        try:
            # Generate Gemini's response
            # Ensure 'model' is properly initialized (e.g., genai.GenerativeModel('gemini-pro'))
            response = model.generate_content(
                contents=[{"role": "user", "parts": [full_prompt]}]
            )
            gemini_response = response.candidates[0].content.parts[0].text
            print(f"Gemini: {gemini_response}")

            # Manage chat history (truncate and store in VDB if needed)
            manage_chat_history(user_message, gemini_response)

        except Exception as e:
            print(f"An error occurred: {e}")
            print("Please check your API key and ensure the model is accessible.")

# Note: Before running chat_with_gemini_with_memory(), ensure you have:
# - Initialized your Gemini 'model' object (e.g., import google.generativeai as genai; model = genai.GenerativeModel('gemini-pro'))
# - Set your Google API Key (genai.configure(api_key="YOUR_API_KEY"))
# - Run the SimulatedVectorDB class definition and the initial global variable setup (vdb, vdb_built_flag, vdb_index_counter, chat_history)
# - Run the manage_chat_history function definition.

# Example of how you would set up the globals and start the chat:
# import google.generativeai as genai
# import os
# genai.configure(api_key=os.environ.get("GOOGLE_API_KEY")) # Or your actual key
# model = genai.GenerativeModel('gemini-pro')

chat_history = []
vdb = SimulatedVectorDB() # Re-initialize vdb if needed for a fresh chat session
vdb_built_flag = False
vdb_index_counter = 0 

chat_with_gemini_with_memory()