In [1]:
import os
from dotenv import load_dotenv
from ibm_watsonx_ai import APIClient
from elasticsearch import Elasticsearch, helpers
from ibm_watsonx_ai.foundation_models import ModelInference
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
from llama_index.core.node_parser import SentenceSplitter
from ibm_watsonx_ai.foundation_models.prompts import (
    PromptTemplate,
    PromptTemplateManager,
)

load_dotenv()

True

# Edit variables as required

In [2]:
INDEX_NAME = "randy_v1"
SPACE_ID = "f9496bd6-164d-412e-a32e-d362f5801c91"
WML_CREDENTIALS = {"apikey": os.getenv("IBM_API_KEY"), "url": "https://us-south.ml.cloud.ibm.com"}
ES_CREDENTIALS = (os.getenv("ES_USERNAME"), os.getenv("ES_PASSWORD"))

pipeline_id = "ingest-pipeline-randy"
model_name = "intfloat__multilingual-e5-base"

es_url = "https://4b859107-b5eb-4bbd-b4d0-dd00c19ee811.c7e0lq3d0hm8lbg600bg.databases.appdomain.cloud:31598"
es = Elasticsearch(hosts=es_url, basic_auth=ES_CREDENTIALS, verify_certs=False, ssl_show_warn=False, request_timeout=600)

llm_model = ModelInference(
    model_id="meta-llama/llama-3-1-70b-instruct",
    credentials=WML_CREDENTIALS,
    params={ GenParams.DECODING_METHOD: "greedy", GenParams.MAX_NEW_TOKENS: 4096 }, space_id=SPACE_ID
)

splitter = SentenceSplitter(chunk_size=450, chunk_overlap=50)

prompt_mgr = PromptTemplateManager(credentials=WML_CREDENTIALS, space_id=SPACE_ID)

wml_client = APIClient(credentials=WML_CREDENTIALS)
wml_client.set.default_space(SPACE_ID)

es.info()

ObjectApiResponse({'name': 'm-0.4b859107-b5eb-4bbd-b4d0-dd00c19ee811.90450e0f666043d6b33a1147ab8c0438.c7e0lq3d0hm8lbg600bg.databases.appdomain.cloud', 'cluster_name': '4b859107-b5eb-4bbd-b4d0-dd00c19ee811', 'cluster_uuid': 'RM8L6RPVRz6RgiIrO3CAoQ', 'version': {'number': '8.12.1', 'build_flavor': 'default', 'build_type': 'tar', 'build_hash': '6185ba65d27469afabc9bc951cded6c17c21e3f3', 'build_date': '2024-02-01T13:07:13.727175297Z', 'build_snapshot': False, 'lucene_version': '9.9.2', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

# Read and chunk data

In [61]:
with open("../data/txt/wiki_movie_plots.txt") as f:
    data = f.read()
    
chunks = splitter.split_text(data)
len(chunks)

48

# Create Elasticsearch pipeline

In [62]:
es.indices.delete(index=INDEX_NAME, ignore_unavailable=True)

pipeline_body = {
    "processors": [
        {
            "inference": {
                "model_id": model_name,
                "input_output": { "input_field": "text", "output_field": "text_embedding" },
            }
        }
    ],
}

# es.ingest.delete_pipeline(id=pipeline_id)

es.ingest.put_pipeline(id=pipeline_id, body=pipeline_body)

es.indices.create(
    index=INDEX_NAME,
    settings={"index": {"default_pipeline": pipeline_id}},
    mappings={
        "properties": {
            "text": {"type": "text"},
            "text_embedding": {"type": "dense_vector"}
        }
    },
)


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'randy_v1'})

# Ingest data

In [63]:
es.indices.delete(index=INDEX_NAME, ignore_unavailable=True)

def get_documents():
    for x in chunks:
        yield {
            "_op_type": "index",
            "_index": INDEX_NAME,
            "_source": {"text": x},
            "pipeline": pipeline_id,
        }
helpers.bulk(es, get_documents(), chunk_size=100) 


(48, [])

# Deploy prompts

In [6]:
RAG_PROMPT = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful assistant.
Ensure responses are contextually relevant and supported by provided documents. 
If no relevant information is found, respond with "I apologize, but I could not locate any relevant information." <|eot_id|><|start_header_id|>user<|end_header_id|>

<documents>
{context}
</documents>

{user}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

CONVERSATION_PROMPT = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful assistant.
Ensure responses are contextually relevant and supported by provided documents. 
If no relevant information is found, respond with "I apologize, but I could not locate any relevant information." 

<documents>
{context}
</documents>
<|eot_id|>{history}<|start_header_id|>assistant<|end_header_id|>
"""

In [8]:
prompt_mapping = [
    {
        "name": "rag-prompt-randy",
        "prompt": RAG_PROMPT,
        "variables": ["context", "user"]
    },
    {
        "name": "conversation-prompt-randy",
        "prompt": CONVERSATION_PROMPT,
        "variables": ["context", "history"]
    }
]

deployments = {
    x["metadata"]["name"]: x["metadata"]["id"]
    for x in wml_client.deployments.get_details()["resources"]
}

for x in prompt_mapping:
    prompt_name, prompt, variables = x["name"], x["prompt"], x["variables"] 
    prompts = prompt_mgr.list().query(f"NAME == '{prompt_name}'")
    
    if prompt_name in deployments:
        deployment_id = deployments[prompt_name]
        
    if not prompts.empty:
        prompt_id = prompts["ID"].values[0]
        prompt_mgr.unlock(prompt_id)

    prompt_template = PromptTemplate(
        name=prompt_name,
        model_id="meta-llama/llama-3-1-70b-instruct",
        input_text=prompt,
        input_variables=variables,
        model_params={"decoding_method": "greedy", "max_new_tokens": 4096, "repetition_penalty": 1},
    )

    if prompt_name in deployments:
        prompt_mgr.update_prompt(prompt_id, prompt_template)
    else:
        stored_prompt_template = prompt_mgr.store_prompt(prompt_template)
        meta_props = {
            wml_client.deployments.ConfigurationMetaNames.NAME: prompt_name,
            wml_client.deployments.ConfigurationMetaNames.ONLINE: {},
            wml_client.deployments.ConfigurationMetaNames.BASE_MODEL_ID: "meta-llama/llama-3-1-70b-instruct",
        }
        deployment_details = wml_client.deployments.create(stored_prompt_template.prompt_id, meta_props)
        deployment_id = deployment_details["metadata"]["id"]
        
{
    x["metadata"]["name"]: x["metadata"]["id"]
    for x in wml_client.deployments.get_details()["resources"]
}

{'chat': '169518ed-3b41-4542-aba6-fb8625a3bc46',
 'Retrival Function': '1a504e05-0697-4cd5-85db-7a578422fa99',
 'conversation-prompt-randy': '1c7d6100-2e88-4849-8717-23fdc5f941ba',
 'RAG Prompt': '24e2b6f0-e1e4-415d-801a-e4cf03b2b92a',
 'Chat Prompt': '616c3671-485e-4f73-bbaf-8f4702711aa5',
 'rag-prompt-randy': 'b82684e1-5c07-493a-8beb-b6dbf188469f',
 'Demo Deployment': 'd457172b-f1fe-43e4-8943-88eb837e899e'}

# Test RAG

In [7]:
question = "what is the movie geostorm about?"

results = es.search(
    index=INDEX_NAME,
    body={
        "knn": {
            "field": "text_embedding",
            "query_vector_builder": {
                "text_embedding": {"model_id": model_name, "model_text": question}
            },
            "k": 5,
            "num_candidates": 50,
        },
        "_source": False,
        "fields": ["text"],
    },
)

for hit in results["hits"]["hits"]:
    print(str(hit))
    print()

context = "<document>\n" + "\n</document>\n\n<document>\n".join([hit["fields"]["text"][0] for hit in results["hits"]["hits"]]) + "\n</document>"

generated_text = llm_model.generate_text(RAG_PROMPT.format(context=context, user=question))
print(generated_text)

{'_index': 'randy_v1', '_id': 'AoNkc5IB7hGZ96Ht6cDC', '_score': 0.90877527, 'fields': {'text': ['As Brad\'s mom\'s flight arrives, Dusty and Brad discover that Brad\'s new stepdad is Chesley "Sully" Sullenberger, the pilot of the "Miracle on the Hudson" flight. Brad and Dusty remember that they watched the film "Sully" together not too long ago and Brad appears to be welcoming towards him. Brad runs down the terminal and says that Sully will never replace his father, because Sully has only one great personal story, whereas his father has thousands of stories.\n\n\n\nyear: 2017\ntitle: Geostorm\ndirector: Dean Devlin\ncast: Dean Devlin (director/screenplay); Paul Guyot (screenplay); Gerard Butler, Abbie Cornish, Alexandra Lara, Jim Sturgess, Amr Waked, Ed Harris, Andy García\nplot: In 2019, following many catastrophic natural disasters, an international coalition commissions a system of climate-controlling satellites called "Dutch Boy". After Dutch Boy neutralizes a typhoon in Shanghai,

In [107]:
deployment_id = "b82684e1-5c07-493a-8beb-b6dbf188469f"

generated_text = wml_client.deployments.generate_text(
    deployment_id=deployment_id,
    params={"prompt_variables": {"context": context, "user": question}},
)

print(generated_text)

Dwayne Johnson, Kevin Hart, Jack Black, Nick Jonas, and Karen Gillan acted in Jumanji: Welcome to the Jungle.<|eot_id|>
