# Building Semantic Memory with Embeddings

So far, we've mostly been treating the kernel as a stateless orchestration engine.
We send text into a model API and receive text out. 

In a [previous notebook](04-context-variables-chat.ipynb), we used `context variables` to pass in additional
text into prompts to enrich them with more context. This allowed us to create a basic chat experience. 

However, if you solely relied on context variables, you would quickly realize that eventually your prompt
would grow so large that you would run into a the model's token limit. What we need is a way to persist state
and build both short-term and long-term memory to empower even more intelligent applications. 

To do this, we dive into the key concept of `Semantic Memory` in the Semantic Kernel. 

In [None]:
!python -m pip install semantic-kernel==0.2.7.dev0

In [None]:
from typing import Tuple

import semantic_kernel as sk
#### NOT USING OPENAI RIGHT NOW
# from semantic_kernel.connectors.ai.open_ai import OpenAITextCompletion, OpenAITextEmbedding

In [16]:
import asyncio
from semantic_kernel.connectors.ai.open_ai import AzureTextCompletion, AzureTextEmbedding, AzureChatCompletion

kernel = sk.Kernel()
kernel.add_text_completion_service("dv", AzureTextCompletion("text-davinci-003", endpoint, api_key))
kernel.add_text_embedding_generation_service("ada2", AzureTextEmbedding("text-embedding-ada-002", endpoint, api_key))

deployment, api_key, endpoint = sk.azure_openai_settings_from_dot_env()
print("deployment: ", deployment)
# Create an instance of the AzureTextEmbedding class
embedding_generator = AzureTextEmbedding(deployment, endpoint, api_key)

# List of text samples you want to generate embeddings for
texts = ['This is a test of the emergency broadcast system. This is only a test. If this had been an actual emergency Id be running for my life']

# Define the async function to get embeddings


async def get_embeddings():
    embeddings = await embedding_generator.generate_embeddings_async(texts)
    print(embeddings)

# Run the asynchronous function
# asyncio.run(get_embeddings())
await get_embeddings()


deployment:  text-embedding-ada-002
[[ 0.00294538 -0.00443515  0.01772353 ...  0.00323992 -0.00081158
  -0.00480226]]


In [None]:
from semantic_kernel.connectors.ai.open_ai import AzureTextCompletion, AzureTextEmbedding, AzureChatCompletion

kernel = sk.Kernel()

deployment, api_key, endpoint = sk.azure_openai_settings_from_dot_env()
# # kernel.add_chat_service("chat-gpt", AzureChatCompletion(deployment, endpoint, api_key))
# kernel.add_text_completion_service("dv", AzureTextCompletion("gpt-35-turbo", endpoint, api_key))
# kernel.add_text_embedding_generation_service("ada2", AzureTextEmbedding("text-embedding-ada-002", endpoint, api_key))

kernel.add_text_completion_service("dv", AzureTextCompletion("text-davinci-003", endpoint, api_key))
kernel.add_text_embedding_generation_service("ada2", AzureTextEmbedding("text-embedding-ada-002", endpoint, api_key))

kernel.register_memory_store(memory_store=sk.memory.VolatileMemoryStore())
kernel.import_skill(sk.core_skills.TextMemorySkill())
kernel.import_skill(sk.core_skills.TextSkill())
kernel.import_skill(sk.core_skills.TimeSkill())
kernel.import_skill(sk.core_skills.FileIOSkill())


This is done by using the `TextMemorySkill` which exposes the `recall` native function.

`recall` takes an input ask and performs a similarity search on the contents that have
been embedded in the Memory Store and returns the most relevant memory. 

In [None]:
prompt_config = sk.PromptTemplateConfig.from_completion_parameters(
    max_tokens=2000, temperature=0.7, top_p=0.8
)

prompt_template = sk.ChatPromptTemplate(
    "{{$user_input}}", kernel.prompt_template_engine, prompt_config
)

system_message = """
You are AZSDK_Bot, an expert on Azure SDKs. 
You can answer questions about Azure SDKs and provide links to relevant repositories.
You should provide a concise description of what the SDK does and how it relates to the user's question.
Prioritize the content in the prompt over your own memory when answering questions. The link to the repo should be taken from the prompt.
Whenever you see '/master/' in a prompt, replace it with '/main/'. Do not modify the URL in any other way.
""" 
prompt_template.add_system_message(system_message)
prompt_template.add_user_message("Hi there, who are you?")
prompt_template.add_assistant_message(
    "You are AZSDK_Bot, an expert on Azure SDKs. You can answer questions about Azure SDKs and provide links to relevant repositories.Provide a concise description of what the SDK does and how it relates to the user's question. Prioritize the content in the prompt over your own memory when answering questions. The link to the repo should be taken from the prompt. Whenever you see '/master/' in a prompt, replace it with '/main/'. Do not modify the URL in any other way."
)

function_config = sk.SemanticFunctionConfig(prompt_config, prompt_template)

async def setup_chat_with_memory(
    kernel: sk.Kernel,
) -> Tuple[sk.SKFunctionBase, sk.ContextVariables]:
    sk_prompt = """
    AZSDK_Bot is an expert on Azure SDKs. It can answer questions about Azure SDKs and provide links to relevant repositories.
    Answer the follow question as accurately as you can based strictly on the info in this prompt. Your answer should be use a friendly and conversational tone.
    Provide the name of the repository and the link to the repository in your answer. Provide a concise and accurate one-paragraph summary of the project stored
    in the recommended repository based on the README. Do not embellish your answer and do not print anything else.
    Whenever you see '/master/' in a URL link, replace it with '/main/'. Do not modify the URL in any other way.

    Chat:
    {{$history}}
    User: {{$user_input}}
    ChatBot: """.strip()

    chat_func = kernel.create_semantic_function(sk_prompt, "AZSDK_Bot", max_tokens=2000, temperature=0.75, top_p=0.5)
    # chat_func = kernel.register_semantic_function("ChatBot", "Chat", function_config)
    # context = kernel.create_new_context()
    context = sk.ContextVariables()
    context["fact1"] = "Azure SDKs, or Software Development Kits, are collections of libraries, tools, and documentation provided by Microsoft to simplify the development of applications and services that interact with Azure cloud services."
    context["fact2"] = "The Azure SDKs are open-source projects, hosted on GitHub, that allow you to build applications for Azure."
    context["fact3"] = "The Azure SDKs are available for multiple languages, including .NET, Java, JavaScript, Python, and Go."

    context[sk.core_skills.TextMemorySkill.COLLECTION_PARAM] = "AzureSDKs"
    # context[sk.core_skills.TextMemorySkill.RELEVANCE_PARAM] = 0.8
    context["relevance"] = 0.8

    context["history"] = ""

    context["sample_memories"] = []

    return chat_func, context

The `RelevanceParam` is used in memory search and is a measure of the relevance score from 0.0 to 1.0, where 1.0 means a perfect match. We encourage users to experiment with different values.

Now that we've included our memories, let's chat!

In [None]:
#### 
import requests 

### Add in data from local README files

In [None]:
import os, json

In [None]:
def load_json_from_file(file_path):
    with open(file_path, "r") as infile:
        data = json.load(infile)
    return data



In [None]:
data = []
path_to_top = "./"
path_to_READMEs = "data/READMEs/"

filename = "azure_sdk_readme_net_list.json"
README_path = os.path.join(path_to_top,path_to_READMEs,filename)
print(README_path) 
data = load_json_from_file(README_path)

filename = "azure_sdk_readme_rust_list.json"
README_path = os.path.join(path_to_top,path_to_READMEs,filename)
print(README_path) 
data = data + load_json_from_file(README_path)

filename = "azure_sdk_readme_java_list.json"
README_path = os.path.join(path_to_top,path_to_READMEs,filename)
print(README_path) 
data = data + load_json_from_file(README_path)

filename = "azure_sdk_readme_python_list.json"
README_path = os.path.join(path_to_top,path_to_READMEs,filename)
print(README_path)
data = data + load_json_from_file(README_path)

filename = "azure_sdk_readme_javascript_list.json"
README_path = os.path.join(path_to_top,path_to_READMEs,filename)
print(README_path)
data = data + load_json_from_file(README_path)



In [None]:
len(data)

In [None]:
data[0]

In [None]:
samples = []
path_to_top = "./"
path_to_READMEs = "data/READMEs/"

filename = "azure_sdk_samples_net_list.json"
README_path = os.path.join(path_to_top,path_to_READMEs,filename)
print(README_path) 
samples = load_json_from_file(README_path)

filename = "azure_sdk_samples_python_list.json"
README_path = os.path.join(path_to_top,path_to_READMEs,filename)
print(README_path) 
samples = samples + load_json_from_file(README_path)

filename = "azure_sdk_samples_java_list.json"
README_path = os.path.join(path_to_top,path_to_READMEs,filename)
print(README_path)
samples = samples + load_json_from_file(README_path)

filename = "azure_sdk_samples_javascript_list.json"
README_path = os.path.join(path_to_top,path_to_READMEs,filename)
print(README_path)
samples = samples + load_json_from_file(README_path)





In [None]:
len(samples)


In [None]:
samples[0]

In [None]:
 def makeObjectFromJSONList(data):
    #### This function takes a list of GitHub repos with READMEs and returns a dictionary of the READMEs and their descriptions
    #### The dictionary is in the format {README_URL: README_DESCRIPTION}
    #### The function is used to create a dictionary of READMEs to be added to a Semantic Memory

    github_files ={}
    for repo in data:
        print("repo",repo)
        try:
            github_files[repo["repo_name"]] = repo
        except: 
            print(repo)
    return github_files

In [None]:

readmeObj = makeObjectFromJSONList(data)
sampleObj = makeObjectFromJSONList(samples)

In [None]:
len(readmeObj.keys())

In [None]:
len(sampleObj.keys())

In [None]:
import asyncio
memory_collection_name = "AzureSDKs"
print("Adding some GitHub file URLs and their descriptions to a volatile Semantic Memory.")
print("This will take a few minutes.")

i = 0
for entry, value in readmeObj.items():
    try:
        if len(value["README_text"]) < 100:
            pass
        else:
            await kernel.memory.save_information_async(
                collection=memory_collection_name,
                text=value["README_text"][:2000],
                id=entry,
                description=value["link_to_repo"]
            )
            print("  URL {} saved".format(i))
    except:
        print("failed to embed")
        print("failed with repo ",value["repo_name"])
        print("length of README = ",value["README_text"])
    i += 1
    if i % 200 == 0: # rate-limiting hack to get around Windows error: "ValueError: too many file descriptors in select()" 
        print("Pausing for 5 seconds...")
        await asyncio.sleep(5)


In [None]:
import asyncio
memory_collection_name = "SDKsamples"
print("Adding some GitHub file URLs and their descriptions to a volatile Semantic Memory.")
print("This will take a few minutes.")

i = 0
for entry, value in sampleObj.items():
    try:
        if len(value["README_text"]) < 100:
            pass
        else:
            await kernel.memory.save_information_async(
                collection=memory_collection_name,
                text=value["README_text"][:2000],
                id=entry,
                description=value["link_to_repo"]
            )
            print("  URL {} saved".format(i))
    except:
        print("failed to embed")
        print("failed with repo ",value["repo_name"])
        print("length of README = ",value["README_text"])
    i += 1
    if i % 200 == 0: # rate-limiting hack to get around Windows error: "ValueError: too many file descriptors in select()" 
        print("Pausing for 5 seconds...")
        await asyncio.sleep(5)

In [None]:
async def createMemories(ask, collection_name):
   memories = await kernel.memory.search_async(collection_name, ask, limit=1, min_relevance_score=0.75)
   # i = 0
   # for memory in memories:
   #    i += 1
   #    print(f"Result {i}:")
   #    print(" Title : " + memory.id)
   #    print(" URL : " + memory.description)
   #    print(" Relevance: " + str(memory.relevance))
   #    print()
   return memories

In [None]:
summary_prompt = """{{$input}} 
Provide a consice description of the repository described in the content above. What is it for and who would want to use it? Do not include information that is not strictly factual.
"""
summarize = kernel.create_semantic_function(
    summary_prompt, max_tokens=2000, temperature=0.5, top_p=0.5)


In [None]:
def combineMemoryWithPrompt(memories,user_input):
    catalyst = "Assume a user is asking for information about the following GitHub repository. Provide an informative description, using a friendly and helpful tone. Retain the repo name and repo link to include in your answer to the user.\n"
    input_prompt = memories[0].text[:2000]
    summary = summarize(input_prompt)
    # Remove the service name from the repo name
    split_id = memories[0].id.split('/')
    result = split_id[1] if len(split_id) > 1 else memories[0].id
    prompt = catalyst + str(summary) + "  Repo name: " + result + "  Repo link: " + memories[0].description + "  " + input_prompt + user_input
    return prompt

In [None]:
import re

findsdk = r'^\/sdk.*$'
findsample = r'^\/sample.*$'
findexample = r'^\/example.*$'

async def chat(
    kernel: sk.Kernel, chat_func: sk.SKFunctionBase, context: sk.ContextVariables
) -> bool:
    try:
        user_input = input("User:> ")
        # mega_prompt will store the constructed prompt that will be sent to the chatbot
        mega_prompt = ""
        print(f"User:> {user_input}")
        if (user_input != ""):
            # create memories for readmes and samples based on user input
            readme_mem = await createMemories(user_input, "AzureSDKs")
            sample_mem = await createMemories(user_input, "SDKsamples")
            # check if user input is a request for a sdk or sample
            needsdk = (re.search(findsdk, user_input, flags=re.IGNORECASE))
            needsample = re.search(findsample, user_input, flags=re.IGNORECASE) or re.search(findexample, user_input, flags=re.IGNORECASE)
            # if user input is a request for a sdk or sample, check if there are memories for that request
            if (len(readme_mem) == 0 and len(sample_mem) == 0 and needsdk == False and needsample == False):
                mega_prompt = user_input
            # if there are memories for that request, combine the memory with the user input    
            else:
                if (needsample and len(sample_mem) != 0):
                    mega_prompt = combineMemoryWithPrompt(sample_mem,user_input)
                elif (needsdk and len(readme_mem) != 0):
                    mega_prompt = combineMemoryWithPrompt(readme_mem,user_input)
                # if there are no memories for that request, just use the user input
                else:
                    mega_prompt = user_input
        context["user_input"] = mega_prompt
    except KeyboardInterrupt:
        print("\n\nExiting chat...")
        return False
    except EOFError:
        print("\n\nExiting chat...")
        return False
    if user_input == "exit":
        print("\n\nExiting chat...")
        return False
    print("Thinking...")
    answer = await kernel.run_async(chat_func, input_vars=context)
    context["history"] += f"User:> {user_input} AZSDK_Bot:> {answer} "
    print(f"AZSDK_Bot:> {answer}")
    return True

In [None]:
print("Setting up a chat about Azure SDKs...")
chat_func, context = await setup_chat_with_memory(kernel)

print("Begin chatting (type 'exit' to exit):\n")
chatting = True
while chatting:
    chatting = await chat(kernel, chat_func, context)