In [None]:
! pip install elasticsearch ibm-watsonx-ai llama-index

In [10]:
import os
from dotenv import load_dotenv
from ibm_watsonx_ai import APIClient
from elasticsearch import Elasticsearch, helpers
from ibm_watsonx_ai.foundation_models import ModelInference
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
from llama_index.core.node_parser import SentenceSplitter
from ibm_watsonx_ai.foundation_models.prompts import (PromptTemplate, PromptTemplateManager)

load_dotenv()

True

# Edit variables as required

In [11]:
INDEX_NAME = "randy_v1"
SPACE_ID = "d36dea60-d955-45a1-9ebc-73095f296806"
PIPELINE_ID = "ingest-pipeline-randy"

WML_CREDENTIALS = {"apikey": os.getenv("IBM_API_KEY"), "url": "https://us-south.ml.cloud.ibm.com"}
ES_CREDENTIALS = (os.getenv("ES_USERNAME"), os.getenv("ES_PASSWORD"))

model_name = "intfloat__multilingual-e5-base"

es_url = "https://b282a6e7-a2c0-4f71-b760-149c6170bf9f.6131b73286f34215871dfad7254b4f7d.databases.appdomain.cloud:31360"
es = Elasticsearch(hosts=es_url, basic_auth=ES_CREDENTIALS, verify_certs=False, ssl_show_warn=False, request_timeout=600)

llm_model = ModelInference(
    model_id="meta-llama/llama-3-1-70b-instruct",
    credentials=WML_CREDENTIALS,
    params={ GenParams.DECODING_METHOD: "greedy", GenParams.MAX_NEW_TOKENS: 4096 }, space_id=SPACE_ID
)

splitter = SentenceSplitter(chunk_size=450, chunk_overlap=50)

prompt_mgr = PromptTemplateManager(credentials=WML_CREDENTIALS, space_id=SPACE_ID)

wml_client = APIClient(credentials=WML_CREDENTIALS)
wml_client.set.default_space(SPACE_ID)

es.info()

ObjectApiResponse({'name': 'm-2.b282a6e7-a2c0-4f71-b760-149c6170bf9f.018bbe2e6bd84ddab8bedc84b9842f82.6131b73286f34215871dfad7254b4f7d.databases.appdomain.cloud', 'cluster_name': 'b282a6e7-a2c0-4f71-b760-149c6170bf9f', 'cluster_uuid': '13ZhY08cSt6K_hdIs3T4CQ', 'version': {'number': '8.12.1', 'build_flavor': 'default', 'build_type': 'tar', 'build_hash': '6185ba65d27469afabc9bc951cded6c17c21e3f3', 'build_date': '2024-02-01T13:07:13.727175297Z', 'build_snapshot': False, 'lucene_version': '9.9.2', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

# Read and chunk data

In [12]:
with open("../data/txt/wiki_movie_plots.txt") as f:
    data = f.read()
    
chunks = splitter.split_text(data)

len(chunks)

48

# Create Elasticsearch pipeline

In [4]:
es.indices.delete(index=INDEX_NAME, ignore_unavailable=True)

pipeline_body = {
    "processors": [
        {
            "inference": {
                "model_id": model_name,
                "input_output": { "input_field": "text", "output_field": "text_embedding" },
            }
        }
    ],
}

# es.ingest.delete_pipeline(id=pipeline_id)

es.ingest.put_pipeline(id=PIPELINE_ID, body=pipeline_body)

es.indices.create(
    index=INDEX_NAME,
    settings={"index": {"default_pipeline": PIPELINE_ID}},
    mappings={
        "properties": {
            "text": {"type": "text"},
            "text_embedding": {"type": "dense_vector"}
        }
    },
)


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'randy_v1'})

# Ingest data

In [5]:
es.indices.delete(index=INDEX_NAME, ignore_unavailable=True)

def get_documents():
    for x in chunks:
        yield {
            "_op_type": "index",
            "_index": INDEX_NAME,
            "_source": {"text": x},
            "pipeline": PIPELINE_ID,
        }
helpers.bulk(es, get_documents(), chunk_size=100) 


(48, [])

# Deploy prompts

In [13]:
RAG_PROMPT = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful assistant.
Ensure responses are contextually relevant and supported by provided documents. 
If no relevant information is found, respond with "I apologize, but I could not locate any relevant information." <|eot_id|><|start_header_id|>user<|end_header_id|>

<documents>
{context}
</documents>

{user}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

CONVERSATION_PROMPT = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful assistant.
Ensure responses are contextually relevant and supported by provided documents. 
If no relevant information is found, respond with "I apologize, but I could not locate any relevant information." 

<documents>
{context}
</documents>
<|eot_id|>{history}<|start_header_id|>assistant<|end_header_id|>
"""

QUERY_REWRITE_PROMPT = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful assistant.
Given the conversation history and the latest question, resolve any ambiguous references in the latest question and rewrite the question.
Respond only with the rewritten question.<|eot_id|>{history}<|start_header_id|>assistant<|end_header_id|>
"""

In [15]:
prompt_mapping = [
    {
        "name": "conversation-prompt",
        "prompt": CONVERSATION_PROMPT,
        "variables": ["context", "history"]
    },
    {
        "name": "rewrite-prompt",
        "prompt": QUERY_REWRITE_PROMPT,
        "variables": ["history"]
    }
]

deployments = {
    x["metadata"]["name"]: x["metadata"]["id"]
    for x in wml_client.deployments.get_details()["resources"]
}

for x in prompt_mapping:
    prompt_name, prompt, variables = x["name"], x["prompt"], x["variables"] 
    prompts = prompt_mgr.list().query(f"NAME == '{prompt_name}'")
    
    if prompt_name in deployments:
        deployment_id = deployments[prompt_name]
        
    if not prompts.empty:
        prompt_id = prompts["ID"].values[0]
        prompt_mgr.unlock(prompt_id)

    prompt_template = PromptTemplate(
        name=prompt_name,
        model_id="meta-llama/llama-3-1-70b-instruct",
        input_text=prompt,
        input_variables=variables,
        model_params={"decoding_method": "greedy", "max_new_tokens": 4096, "repetition_penalty": 1},
    )

    if prompt_name in deployments:
        prompt_mgr.update_prompt(prompt_id, prompt_template)
    else:
        stored_prompt_template = prompt_mgr.store_prompt(prompt_template)
        meta_props = {
            wml_client.deployments.ConfigurationMetaNames.NAME: prompt_name,
            wml_client.deployments.ConfigurationMetaNames.ONLINE: {},
            wml_client.deployments.ConfigurationMetaNames.BASE_MODEL_ID: "meta-llama/llama-3-1-70b-instruct",
        }
        deployment_details = wml_client.deployments.create(stored_prompt_template.prompt_id, meta_props)
        deployment_id = deployment_details["metadata"]["id"]
        
{
    x["metadata"]["name"]: x["metadata"]["id"]
    for x in wml_client.deployments.get_details()["resources"]
}

{'rag-prompt-cole': '07c0b0e4-4793-4c01-bd65-c377d1f89c96',
 'conversation-prompt-sherwin': '8c866c8b-6058-40a0-9ea5-e7a35d056263',
 'rag-prompt-billie': 'b2987ea9-64ba-46ab-875e-99a8160e3d20',
 'rag-prompt-sherwin': 'ec143ddf-d420-49c3-86d5-0767480d4064',
 'conversation-prompt-randy': 'f76d6d86-8048-4a7f-b4f5-a44516cb3333',
 'rag-prompt-randy': 'fd0e3094-c939-41ce-abf6-29fe42da1af1'}

# Test RAG

In [19]:
question = "what movies did scott cooper directed"

results = es.search(
    index=INDEX_NAME,
    body={
        "knn": {
            "field": "text_embedding",
            "query_vector_builder": {
                "text_embedding": {"model_id": model_name, "model_text": question}
            },
            "k": 5,
            "num_candidates": 50,
        },
        "_source": False,
        "fields": ["text"],
    },
)

for hit in results["hits"]["hits"]:
    print(str(hit))
    print()

context = "<document>\n" + "\n</document>\n\n<document>\n".join([hit["fields"]["text"][0] for hit in results["hits"]["hits"]]) + "\n</document>"

generated_text = llm_model.generate_text(RAG_PROMPT.format(context=context, user=question))
print(generated_text)

{'_index': 'randy_v1', '_id': 'lpL9c5IB2mOl2fk3M_MW', '_score': 0.8868777, '_ignored': ['text.keyword'], 'fields': {'text': ['Having returned to Leisureland, Paul assists Ngoc Lan in her duties of providing needed aid and supplies to the people of the slums.\n\n\n\nyear: 2017\ntitle: Wonderstruck\ndirector: Todd Haynes\ncast: Todd Haynes (director); Brian Selznick (screenplay); Oakes Fegley, Julianne Moore, Millicent Simmonds, Michelle Williams, Jaden Michael, Tom Noonan, James Urbaniak, Amy Hargreaves\nplot: The film interlaces two stories set fifty years apart, switching frequently between them. Each tells the story of a child\'s quest. In 1927, Rose (Simmonds) runs away from her father\'s New Jersey home to find her mother/idol, the actress Lillian Mayhew (Moore). In 1977, recently orphaned Ben (Fegley) runs away from his Minnesota home in search of his father.[3]\n\n\n\nyear: 2017\ntitle: Only the Brave\ndirector: Joseph Kosinski\ncast: Joseph Kosinski (director); Ken Nolan, Eric W

In [20]:
deployment_id = "fd0e3094-c939-41ce-abf6-29fe42da1af1"

generated_text = wml_client.deployments.generate_text(
    deployment_id=deployment_id,
    params={"prompt_variables": {"context": context, "user": question}},
)

print(generated_text)

Scott Cooper directed the movie "Hostiles" (2017).
