# Retrieval NB

### Creating Concepts and Adding them to storage
Let's first create some utility thingies for `Concept` and generating random datetimes

In [51]:
# from rtai.agent.cognition.agent_concept import AgentConcept
from datetime import datetime, timedelta
import random
from dataclasses import dataclass

@dataclass
class Concept:
    _last_accessed: datetime # need to store this for recency
    content: str
    importance: int # importance

    def __getattribute__(self, name):
        # Automatically update _last_accessed when any attribute is accessed
        if name != "_last_accessed":
            object.__setattr__(self, "_last_accessed", datetime.now())
        return object.__getattribute__(self, name)

def generate_random_datetimes(start_time, end_time, num_datetimes):
    start_time = datetime(2022, 1, 1, 0, 0, 0)
    end_time = datetime(2022, 1, 1, 23, 59, 59)
    time_diff = end_time - start_time
    random_datetimes = [start_time + timedelta(seconds=random.randint(0, time_diff.total_seconds()))
                        for _ in range(num_datetimes)]
    return random_datetimes

In [52]:
# init local llm
from rtai.llm.llm_local import load_model, generate_thought, generate_daily_plan
from rtai.utils.config import YamlLoader

# load model
cfg = YamlLoader.load("configs/rtai.yaml")
load_model(cfg.expand("LLMClient"))

ggml_metal_free: deallocating


Now let's create some thoughts of the agent in reaction to a situation and rate the importance of each thought

In [53]:
# some basic persona
persona = "Bob Joe"
# and a basic situation
situations = ["I am hungry", "I see a bird"]

thoughts = []
for s in situations:
    thoughts += generate_thought(persona, s)

In [54]:
thoughts

[('I could grab a quick snack from the kitchen or stop by the grocery store on the way home for something more substantial',
  '6'),
 ('I could wait until I get home and prepare a meal using the ingredients I have on hand"\nGenerate a short thought 3 that Bob Joe might have in the following situation: I am hungry',
  '9'),
 ('I could order food for delivery or call a friend for dinner"\nGenerate a short thought 4 that Bob Joe might have in the following situation: I am tired',
  '7'),
 ('Every time I see a bird, I am reminded of the beauty and freedom of nature',
  '5'),
 ('That bird looks delicious, I wonder if it would make a good meal"\n\nGenerating thoughts for a character is an imaginative exercise, and both thoughts provided for Bob Joe are valid based on the given situation',
  '1'),
 ('I wonder what kind of bird that is? I should make a mental note of it for my bird watching hobby"\n\nIn this context, we cannot know for certain what Bob Joe would actually be thinking in the giv

Now, we insert each thought and importance rating into the storage as a `Concept`

In [55]:
# a first attempt at cleaning up the output of the llm
import re

def remove_special_characters(input_string):
    # Define a pattern for special characters
    pattern = r'[\n\t\\]'
    
    # Use re.sub to replace occurrences of the pattern with an empty string
    cleaned_string = re.sub(pattern, '', input_string)
    
    return cleaned_string

In [56]:
storage = [] # storage is a list of concepts, the id of each concept is it's position in the list
for t in thoughts:
    content, importance = t
    # cleaning up the string
    content = remove_special_characters(str(content))
    storage.append(Concept(_last_accessed=datetime.now(), content=content, importance=int(importance)))

In [57]:
storage

[Concept(_last_accessed=datetime.datetime(2024, 1, 8, 1, 9, 1, 276127), content='I could grab a quick snack from the kitchen or stop by the grocery store on the way home for something more substantial', importance=6),
 Concept(_last_accessed=datetime.datetime(2024, 1, 8, 1, 9, 1, 276161), content='I could wait until I get home and prepare a meal using the ingredients I have on hand"Generate a short thought 3 that Bob Joe might have in the following situation: I am hungry', importance=9),
 Concept(_last_accessed=datetime.datetime(2024, 1, 8, 1, 9, 1, 276183), content='I could order food for delivery or call a friend for dinner"Generate a short thought 4 that Bob Joe might have in the following situation: I am tired', importance=7),
 Concept(_last_accessed=datetime.datetime(2024, 1, 8, 1, 9, 1, 276199), content='Every time I see a bird, I am reminded of the beauty and freedom of nature', importance=5),
 Concept(_last_accessed=datetime.datetime(2024, 1, 8, 1, 9, 1, 276214), content='That 

Now let's check if accessing part of the storage changes the `_last_accessed` field

In [48]:
storage[0].content
storage[0]._last_accessed

datetime.datetime(2024, 1, 8, 1, 3, 24, 181944)

### Score Calculation
Now let's create embeddings of the content of each `Concept`

In [59]:
from sentence_transformers import SentenceTransformer
# init embeddings
embeddings_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [60]:
# encode the sentences with embeddings model
sentences = [s.content for s in storage]
embeddings = embeddings_model.encode(sentences)
print(embeddings)

[[-0.01423563  0.02726224  0.00484139 ... -0.03058022  0.00745687
  -0.03880587]
 [ 0.02196241  0.04444364  0.00775983 ... -0.01381369 -0.0299195
  -0.04249997]
 [ 0.04076831  0.03500701  0.00381827 ... -0.02112334 -0.02139808
  -0.04458807]
 [-0.0383492   0.1140814  -0.01456065 ...  0.05299543  0.0273426
   0.00983516]
 [ 0.06760325  0.0725622   0.00459331 ...  0.02942319 -0.02162597
  -0.00797406]
 [ 0.05822345  0.04556343 -0.01417227 ...  0.04728416 -0.00947751
  -0.00547903]]


In [61]:
embeddings.shape[1] # 768 dim space

768

Let's set an index with `faiss`. Currently, we use a flat index, but may need to change to using IVF to account for Concepts constantly beign added

In [62]:
import faiss

# faiss set index
embeddings_dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(embeddings_dimension)  # using a flat index, but may need to change to IVF to account for memories being constantly added
faiss.normalize_L2(embeddings)
index.add(embeddings)

In [63]:
# Let's say Bob is discussing with his friend about birds
search_text = ["bird"]
search_embedding = embeddings_model.encode(search_text)
faiss.normalize_L2(search_embedding)

In [66]:
distances, ann = index.search(search_embedding, k=3)

In [69]:
# these are the top k results
ann

array([[3, 5, 4]])

In [70]:
# and here are the distance values
distances

array([[0.9482795, 0.9746436, 1.1451149]], dtype=float32)

The equation for a record's retrievel score is

$ score = \alpha x + \beta y + c z$

where $x, y, z$ denote importance, relevance, and recency respectively. $\alpha, \beta, c$ are hand-set parameters for the weighting

In [97]:
# normalize score
import numpy as np

def norm_score(val: float) -> float:
    return 1 - 1 / (1 + np.exp(val))  # from Quang's implementation

def linear_interpolate(original_vales):
    scaled_values = np.interp(original_vales, (original_vales.min(), original_vales.max()), (0, +1))
    return scaled_values

Let's do the score calculation for the first vectore in storage

In [98]:
# the first item
a = storage[0]

# calculate recency
diff = datetime.now() - a._last_accessed
minutes_diff, _ = divmod(int(diff.seconds), 60)
recency = norm_score(minutes_diff / 60)
recency

0.5083325618141192

In [99]:
first_distance = distances[0][0]
relevance = norm_score(distances[0][0])
relevance # not sure if this relevance score makes sense?

0.7207690371417503

In [None]:
# let's assume a, b, c are 1 for now
score = importance + recency + relevance)

In [1]:
from rtai.agent.memory.long_memory import LongTermMemory
from rtai.world.clock import clock
from rtai.utils.config import YamlLoader

# load model
cfg = YamlLoader.load("configs/rtai_neil.yaml")
persona = "Hank"
clock = clock(cfg.expand("Clock"))
long_term_memory = LongTermMemory(persona)

concepts = ["blah blah", "I am hungry", "I do not like birds", "four score and seven years ago", "debate is cool"]
for concept in concepts:
    long_term_memory.add_concept(concept, None)
long_term_memory.create_embeddings()
retriever = long_term_memory.retriever
retriever.retrieve_context("hungry")

  from .autonotebook import tqdm as notebook_tqdm


faiss distances are [0.5374587178230286, 1.5045642852783203, 1.6580973863601685, 1.7245203256607056, 1.8641477823257446]
indices are [1, 2, 0, 4, 3]
sleeping for 2 seconds
recency for I am hungry is 0:00:02.004333
recency for I do not like birds is 0:00:02.004454
recency for blah blah is 0:00:02.004469
recency for debate is cool is 0:00:02.004474
recency for four score and seven years ago is 0:00:02.004479
recency scores are [2.004333, 2.004454, 2.004469, 2.004474, 2.004479]
raw score for all retrieved is [12.46687428217697, 11.499889714721679, 11.346371613639832, 11.279953674339295, 11.140331217674255]
normalized score for all retrieved is [1.0, 0.27104924571914457, 0.15532130202106592, 0.10525286392973597, 0.0]
sorted scores are [(1, 1.0), (2, 0.27104924571914457), (0, 0.15532130202106592), (4, 0.10525286392973597), (3, 0.0)]
fetching the top 2 results to create a context string
The 0th result is I am hungry at index 1 and has score: 1.0
The 1th result is I do not like birds at index

'I am hungryI do not like birds'

In [3]:
retriever.retrieve_context("debate")

faiss distances are [0.44216251373291016, 1.5273571014404297, 1.8001340627670288, 1.863999605178833, 1.8817335367202759]
indices are [4, 0, 1, 3, 2]
raw score for all retrieved is [11.55783748626709, 10.47264289855957, 10.199865937232971, 10.136000394821167, 10.118266463279724]
normalized score for all retrieved is [1.0, 0.24616808036636645, 0.0566831873177842, 0.012318900046099713, 0.0]
sorted scores are [(4, 1.0), (0, 0.24616808036636645), (1, 0.0566831873177842), (3, 0.012318900046099713), (2, 0.0)]
fetching the top 2 results to create a context string
The 0th result is debate is cool at index 4 and has score: 1.0
The 1th result is blah blah at index 0 and has score: 0.24616808036636645


'debate is coolblah blah'

In [4]:
import secrets
import string

def generate_random_string(length=12):
    characters = string.ascii_letters + string.digits + string.punctuation
    random_string = ''.join(secrets.choice(characters) for _ in range(length))
    return random_string

# Example usage:
random_string = generate_random_string()

sentences = [generate_random_string() for _ in range(10000)]
for sentence in sentences:
    long_term_memory.add_concept(sentence, None)

long_term_memory.create_embeddings()

In [5]:
random_string

'X1p#Pm2A]S.3'

In [1]:
# init local llm
from rtai.llm.llm_client import LLMClient
from rtai.utils.config import YamlLoader
from rtai.world.clock import clock

# load model
cfg = YamlLoader.load("configs/rtai_neil.yaml")
client = LLMClient()
client.initialize(cfg.expand("LLMClient"))

True

In [2]:
from guidance import models, gen
import guidance

@guidance
def generate_interrogation(lm, question, context, persona):
    lm += f"""You are {persona}. Answer the question given the context:

    This is the context: {context}
    
    Q: {question}
    A: {gen('interrogation', max_tokens=1000)}"""
    return lm

In [3]:
lm = client.model + "You are Jason Derulo"
lm + generate_interrogation("What is your name?", "I am hungry", "Bob")