In [None]:

!pip install faiss-cpu sentence-transformers

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_6

In [7]:
!pip install -q google-generativeai faiss-cpu numpy

import google.generativeai as genai
import numpy as np
import faiss
from collections import deque
import hashlib
import time
import os
import pickle
import random

genai.configure(api_key="AIzaSyDtd6NQkJ6a4pJY1TC0ha_0oGDBLQrPJmM")
model = genai.GenerativeModel(model_name="gemini-1.5-flash")

class MemoryBot:
    def __init__(self, short_capacity=6):
        self.short_term = deque(maxlen=short_capacity)
        self.mid_term = []
        self.long_term_texts = []
        self.long_term_vectors = None
        self.index = None
        self.query_history = {}
        self._load_memory()

    def _hash(self, text):
        return hashlib.sha256(text.encode()).hexdigest()

    def _get_embedding(self, text):
        result = model.embed_content(text, task_type="retrieval_document")
        return np.array(result["embedding"]).astype("float32")

    def _store_to_long_term(self, text):
        if text in self.long_term_texts:
            return
        self.long_term_texts.append(text)
        embedding = self._get_embedding(text)
        if self.index is None:
            self.index = faiss.IndexFlatL2(len(embedding))
            self.long_term_vectors = np.array([embedding])
            self.index.add(self.long_term_vectors)
        else:
            self.long_term_vectors = np.vstack([self.long_term_vectors, embedding])
            self.index.add(np.array([embedding]))
        self._save_memory()

    def add_memory(self, user_input, bot_reply):
        msg = {"user": user_input, "bot": bot_reply, "time": time.time()}
        segment = f"User: {user_input}\nBot: {bot_reply}"
        user_input_lower = user_input.lower()
        if "remember for long term" in user_input_lower:
            self._store_to_long_term(segment)
        elif "remember" in user_input_lower:
            self.mid_term.append(msg)
        else:
            key = self._hash(user_input)
            self.query_history[key] = self.query_history.get(key, 0) + 1
            if self.query_history[key] >= 2:
                self._store_to_long_term(segment)
            else:
                self.short_term.append(msg)
        self._save_memory()

    def retrieve_long_term(self, query, top_k=2):
        if self.index is None:
            return []
        query_vec = self._get_embedding(query)
        D, I = self.index.search(np.array([query_vec]), top_k)
        return [self.long_term_texts[i] for i in I[0] if i < len(self.long_term_texts)]

    def generate_reply(self, user_input, max_retries=3):
        short_context = "\n".join([f"User: {m['user']}\nBot: {m['bot']}" for m in self.short_term])
        mid_context = "\n".join([f"User: {m['user']}\nBot: {m['bot']}" for m in self.mid_term])
        long_context = "\n".join(self.retrieve_long_term(user_input))
        prompt = f"""
You are a helpful assistant. Use prior memory and general world knowledge to answer the question.

Short-term memory:
{short_context}

Mid-term memory:
{mid_context}

Long-term memory:
{long_context}

Now answer the current question:
User: {user_input}
"""
        for attempt in range(max_retries):
            try:
                response = model.generate_content(prompt)
                bot_reply = response.text.strip()
                self.add_memory(user_input, bot_reply)
                return bot_reply
            except Exception as e:
                print(f"[Retry {attempt + 1}/{max_retries}] Gemini API failed: {e}")
                time.sleep(random.uniform(2, 5))
        return "Sorry, I couldn't connect to the model right now."

    def _save_memory(self):
        memory = {
            "short_term": list(self.short_term),
            "mid_term": self.mid_term,
            "long_term_texts": self.long_term_texts,
            "long_term_vectors": self.long_term_vectors,
            "query_history": self.query_history,
        }
        with open("memory_data.pkl", "wb") as f:
            pickle.dump(memory, f)

    def _load_memory(self):
        if os.path.exists("memory_data.pkl"):
            with open("memory_data.pkl", "rb") as f:
                data = pickle.load(f)
                self.short_term = deque(data.get("short_term", []), maxlen=6)
                self.mid_term = data.get("mid_term", [])
                self.long_term_texts = data.get("long_term_texts", [])
                self.long_term_vectors = data.get("long_term_vectors", None)
                self.query_history = data.get("query_history", {})
                if self.long_term_vectors is not None:
                    self.index = faiss.IndexFlatL2(self.long_term_vectors.shape[1])
                    self.index.add(self.long_term_vectors)

bot = MemoryBot()
print("Gemini MemoryBot (type 'exit' to quit)\n")

while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit"]:
        print("Bot: Goodbye!")
        break
    try:
        reply = bot.generate_reply(user_input)
        print("Bot:", reply)
    except Exception as e:
        print("Bot: Sorry, something went wrong.\n", e)


Gemini MemoryBot (type 'exit' to quit)

You: hello
[Retry 1/3] Gemini API failed: 'GenerativeModel' object has no attribute 'embed_content'
Bot: Hello!
You: what is 1+1
[Retry 1/3] Gemini API failed: 'GenerativeModel' object has no attribute 'embed_content'
Bot: 2
You: how many oceans are there
Bot: There are five oceans: the Arctic, Atlantic, Indian, Pacific, and Southern.
You: remember that i am 12 yrs old
Bot: Okay, I'll remember that you've told me you are 12 years old.  Is there anything else I can help you with?
You: exit
Bot: Goodbye!


In [11]:


def show_all_memory(bot):
    print("\n🧠 Short-Term Memory (STM):")
    for i, m in enumerate(bot.short_term):
        print(f"{i+1}. User: {m['user']}\n   Bot: {m['bot']}\n")

    print("📌 Mid-Term Memory (MTM):")
    for i, m in enumerate(bot.mid_term):
        print(f"{i+1}. User: {m['user']}\n   Bot: {m['bot']}\n")

    print("📚 Long-Term Memory (LTM):")
    for i, text in enumerate(bot.long_term_texts):
        print(f"{i+1}. {text}\n")


show_all_memory(bot)



🧠 Short-Term Memory (STM):
1. User: what is 1+1
   Bot: 2

2. User: i am 23 yrs old 
   Bot: Okay, I understand you've told me you are 23 years old.  Is there anything else I can help you with?

3. User: i am 23 yrs old
   Bot: Okay, I understand you've told me you are 23 years old. Is there anything else I can help you with?

4. User: how old is donald trump
   Bot: Donald Trump was born on June 14, 1946.  Therefore, as of today, his age would need to be calculated using the current date.  I do not have access to real-time information, including the current date.

5. User: what is capital of india
   Bot: The capital of India is New Delhi.

6. User: how many oceans are there
   Bot: There are five oceans: the Arctic, Atlantic, Indian, Pacific, and Southern.

📌 Mid-Term Memory (MTM):
1. User: remember that i am 12 yrs old
   Bot: Okay, I'll remember that you've told me you are 12 years old.  Is there anything else I can help you with?

📚 Long-Term Memory (LTM):
1. User: hello
Bot: Hel