## 04.03 Setting up the Milvus Cache

In [1]:
#Setup database & collection
from pymilvus import connections
from pymilvus import db,Collection

from pymilvus import utility

#Names for connections, database and collections
conn_name = "cache_conn"
db_name="cache_db"
collection_name="llm_cache"

#Create a connection to Milvus
connections.add_connection(
    cache_conn={
        "host": "localhost",
        "port": "19530",
        "username" : "username",
        "password" : "password"
    })


#Connect
connections.connect(conn_name)

#Create a DB if not already present
current_dbs=db.list_database(using=conn_name)

if ( db_name not in current_dbs):
    print("Creating database :", db_name)
    resume_db = db.create_database(db_name, using=conn_name) #default db is "default"
else:
    print(db_name, ": Database already exists")

#Switch to the new database
db.using_database(db_name, using=conn_name)

Creating database : cache_db


In [2]:
#Create a Collection for cache
from pymilvus import CollectionSchema, FieldSchema, DataType, Collection
import json

#Define fields in the cache
#Autogenerated ID field for each entity
cache_id = FieldSchema(
    name="cache_id",
    dtype=DataType.INT64,
    auto_id=True,
    is_primary=True,
    max_length=32)

#Text for the input prompt
prompt_text= FieldSchema(
    name="prompt_text",
    dtype=DataType.VARCHAR,
    max_length=2048)

#Text for the LLM response
response_text= FieldSchema(
    name="response_text",
    dtype=DataType.VARCHAR,
    max_length=2048)

#Embedding for the input prompt
prompt_embedding = FieldSchema(
    name="prompt_embedding",
    dtype=DataType.FLOAT_VECTOR,
    dim=1536 #Define based on embedding used
)

#Define the schema for the cache collection
cache_schema=CollectionSchema(
    fields=[cache_id, prompt_text, response_text, prompt_embedding],
    description="Cache for LLM",
    enable_dynamic_field=True
)

#Create the collection
cache_collection=Collection(
    name=collection_name,
    schema=cache_schema,
    using=conn_name,
    shard_num=2
)

print("Schema : ", cache_collection.schema, "\n")

#Build an index for the prompt embedding field
index_params = {
    "metric_type":"L2",
    "index_type":"IVF_FLAT",
    "params" :{"nlist":1024}
}

cache_collection.create_index(
    field_name="prompt_embedding",
    index_params=index_params
)

#Flush the collection to persist
cache_collection.flush()
#Load the collection in memory
cache_collection.load()

Schema :  {'auto_id': True, 'description': 'Cache for LLM', 'fields': [{'name': 'cache_id', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': True}, {'name': 'prompt_text', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 2048}}, {'name': 'response_text', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 2048}}, {'name': 'prompt_embedding', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 1536}}], 'enable_dynamic_field': True} 



## 04.04. Inference Process with caching

In [3]:
from transformers import AutoTokenizer
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings
import os
import time

#Setup open API key to use OpenAI's LLM
openai_api_key=""
os.environ["OPENAI_API_KEY"] = openai_api_key

#Create an LLM object
llm= OpenAI(temperature=0., model="text-davinci-003")

#Setup embedding model for creating embeddings
embeddings_model = OpenAIEmbeddings()

#setup threshold for similarity between vectors
similarity_threshold=0.3

search_params = {
    "metric_type": "L2", 
    "offset": 0, 
    "ignore_growing": False, 
    "params": {"nprobe": 20, "radius":similarity_threshold}
}

#create a function to run the inference loop
def get_response(prompt):
    
    start_time=time.time()
    #create embedding for incoming prompt
    prompt_embed=embeddings_model.embed_query(prompt)
    
    #Check cache if result exists
    cache_results=cache_collection.search(
        data=[prompt_embed],
        anns_field="prompt_embedding",
        param=search_params,
        limit=1, #Look for the top result only
        expr=None,
        output_fields=["prompt_text", "response_text"],
        consistency_level="Strong"
    )
        
    returned_response ="None"
    
    if ( len(cache_results[0]) > 0 ):
        
        #Cache hit
        print(prompt, " :\n Cache hit : ",cache_results[0])
        returned_response = cache_results[0][0].entity.get("response_text")
    
    else:
        #Find answer with LLM
        llm_response=llm(prompt)
        print(prompt, ":\n LLM returned :", llm_response)
        returned_response = llm_response
        
        #save prompt/response to cache
        prompt_text = [prompt]
        prompt_embedding=[prompt_embed]
        response_text = [llm_response]

        insert_data=[prompt_text, response_text, prompt_embedding]
        mr=cache_collection.insert(insert_data)
    
    end_time = time.time()
    print("Time elapsed :",  end_time - start_time, "\n")
    return returned_response
    

  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [4]:
#Build up the cache
response=get_response("In which year was Abraham Lincoln born?")
response=get_response("What is distance between the sun and the moon?")
response=get_response("How many years have Lebron James played in the NBA?")
response=get_response("What are the advantages of the python language?")
response=get_response("What is the typical height of an elephant")


In which year was Abraham Lincoln born? :
 LLM returned : 

Abraham Lincoln was born in 1809.
Time elapsed : 1.182729959487915 

What is distance between the sun and the moon? :
 LLM returned : 

The average distance between the Sun and the Moon is 238,855 miles (384,400 kilometers).
Time elapsed : 1.1267571449279785 

How many years have Lebron James played in the NBA? :
 LLM returned : 

Lebron James has played in the NBA for 17 years.
Time elapsed : 0.8115711212158203 

What are the advantages of the python language? :
 LLM returned : 

1. Easy to Learn: Python has a very simple and straightforward syntax which makes it very easy to learn and understand. It is also a high-level language, so it abstracts away many of the complex details of the computer.

2. Readability: Python code is highly readable and uses English keywords, which makes it easier to understand and debug.

3. Versatility: Python can be used for a wide variety of tasks, from web development to data science. It is als

In [5]:
response=get_response("List some advantages of the python language")
response=get_response("How tall is an elephant?")

List some advantages of the python language  :
 Cache hit :  ["id: 446817698860241201, distance: 0.04885578155517578, entity: {'response_text': '\\n\\n1. Easy to Learn: Python has a very simple and straightforward syntax which makes it very easy to learn and understand. It is also a high-level language, so it abstracts away many of the complex details of the computer.\\n\\n2. Readability: Python code is highly readable and uses English keywords, which makes it easier to understand and debug.\\n\\n3. Versatility: Python can be used for a wide variety of tasks, from web development to data science. It is also highly extensible, so you can add new features and libraries as needed.\\n\\n4. Open Source: Python is an open source language, so it is free to use and modify. This makes it a great choice for anyone who wants to get started with programming.\\n\\n5. Libraries and Frameworks: Python has a large number of libraries and frameworks available, which makes it easy to get started with an