In [1]:
from elasticsearch import Elasticsearch
from elasticsearch_llm_cache import ElasticsearchLLMCache
from elasticsearch.exceptions import NotFoundError

# common libraries
from dotenv import load_dotenv
import os
from os import environ
import openai
from icecream import ic

# load our environment file
load_dotenv()

es_url = f"https://{os.environ['elasticsearch_user']}:{os.environ['elasticsearch_pw']}@{os.environ['elasticsearch_host']}:{os.environ['elasticsearch_port']}"
# es_index= os.environ['elasticsearch_index']
os.environ['OPENAI_API_KEY'] = os.environ['openai_api_key']
open_api_key=os.environ['OPENAI_API_KEY']

# define our API Key
openai.api_key = os.getenv("openai_api_key")


In [3]:
es = Elasticsearch([es_url])

index_name = 'llm_cache_test'
# model_id = '.elser_model_1'
model_id = 'sentence-transformers__msmarco-minilm-l-12-v3'

if es.ml.get_trained_models(model_id=model_id):
    ic(f'{model_id} exists.')
else:
    ic(f'{model_id} does not exist.')

if es.indices.exists(index=index_name):
    ic(f'{index_name} exists.')
else:
    ic(f'{index_name} does not exist.')
    es.indices.create(index= index_name)

INFO:elastic_transport.transport:GET https://demo-defc18.es.us-central1.gcp.cloud.es.io:9243/_ml/trained_models/sentence-transformers__msmarco-minilm-l-12-v3 [status:200 duration:0.135s]
ic| f'{model_id} exists.': 'sentence-transformers__msmarco-minilm-l-12-v3 exists.'
INFO:elastic_transport.transport:HEAD https://demo-defc18.es.us-central1.gcp.cloud.es.io:9243/llm_cache_test [status:200 duration:0.035s]
ic| f'{index_name} exists.': 'llm_cache_test exists.'


In [7]:
import os
import time
#print(os.environ['ELASTIC_CLOUD_ID'])
#time.sleep(10)
from elasticsearch import Elasticsearch

from elasticsearch_llm_cache import (
    ElasticsearchLLMCache,  # Import the class from the file
)

from pprint import pprint
import time

es_client = es

if es_client.indices.exists(index=index_name):
    ic(f'{index_name} exists.')
else:
    ic(f'{index_name} does not exist.')
    es_client.indices.create(index= index_name)


# Initialize your caching class
cache = ElasticsearchLLMCache(es_client=es_client, index_name=index_name, es_model_id=model_id, create_index=False)
cache.create_index(dims=768)
cache


INFO:elastic_transport.transport:HEAD https://demo-defc18.es.us-central1.gcp.cloud.es.io:9243/llm_cache_test [status:200 duration:0.035s]
ic| f'{index_name} exists.': 'llm_cache_test exists.'
INFO:elastic_transport.transport:HEAD https://demo-defc18.es.us-central1.gcp.cloud.es.io:9243/llm_cache_test [status:200 duration:0.034s]
INFO:elasticsearch_llm_cache:Index llm_cache_test already exists.


<elasticsearch_llm_cache.ElasticsearchLLMCache at 0x10795ac90>

In [18]:
prompt = "Hello, how can I help?"
llm_response = cache.query(prompt_text=prompt, )

# If no cache hit, add new response to cache
if llm_response:
    ic(f"Cache hit: {llm_response['response']}")
else:
    llm_response = "I'm here to assist you!"  # Assume this response is fetched from LLM
    cache.add(prompt=prompt, response=llm_response)
    ic(f"Cache add: {prompt}")

INFO:elastic_transport.transport:POST https://demo-defc18.es.us-central1.gcp.cloud.es.io:9243/llm_cache_test/_search [status:200 duration:0.050s]
INFO:elastic_transport.transport:POST https://demo-defc18.es.us-central1.gcp.cloud.es.io:9243/llm_cache_test/_update/1TtWtYsBIWOTmRMzt2aa [status:200 duration:0.039s]
ic| f"Cache hit: {llm_response['response']}": 'Cache hit: ["I\'m here to assist you!"]'


In [10]:
!eland_import_hub_model --url "$es_url" \
      --hub-model-id "sentence-transformers/msmarco-MiniLM-L-12-v3" \
      --task-type "text_embedding"

2023-11-08 21:17:00,295 INFO : Establishing connection to Elasticsearch
2023-11-08 21:17:00,432 INFO : Connected to cluster named '4dadf200942c4f3fb6113618e49a559c' (version: 8.11.0)
2023-11-08 21:17:00,432 INFO : Loading HuggingFace transformer tokenizer and model 'sentence-transformers/msmarco-MiniLM-L-12-v3'
Downloading (…)okenizer_config.json: 100%|█████| 432/432 [00:00<00:00, 1.02MB/s]
Downloading (…)lve/main/config.json: 100%|█████| 629/629 [00:00<00:00, 9.52MB/s]
Downloading (…)solve/main/vocab.txt: 100%|███| 232k/232k [00:00<00:00, 5.64MB/s]
Downloading (…)cial_tokens_map.json: 100%|██████| 112/112 [00:00<00:00, 424kB/s]
Downloading (…)/main/tokenizer.json: 100%|███| 466k/466k [00:00<00:00, 6.47MB/s]
Downloading pytorch_model.bin: 100%|█████████| 134M/134M [00:08<00:00, 15.1MB/s]
Downloading (…)abaf1/.gitattributes: 100%|█████| 736/736 [00:00<00:00, 5.49MB/s]
Downloading (…)_Pooling/config.json: 100%|█████| 190/190 [00:00<00:00, 1.46MB/s]
Downloading (…)2f517abaf1/README.md: 10