In [16]:
!pip install -q einops==0.7.0 langchain==0.1.9 sentence-transformers==2.4.0 openai==1.13.3 langchain_elasticsearch langchain-community tf-keras jq tiktoken


[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [10]:
#import json
#import openai
#import os
import warnings

warnings.filterwarnings('ignore')

from langchain_elasticsearch import ElasticsearchStore
#from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
#from getpass import getpass
#from urllib.request import urlopen
#from langchain.llms import OpenAI

from langchain.document_loaders import JSONLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

#from langchain.callbacks.base import BaseCallbackHandler
from langchain.chains import RetrievalQA
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.llms import VLLMOpenAI
from langchain.prompts import PromptTemplate
from elasticsearch import Elasticsearch

In [12]:


client = Elasticsearch(['https://elasticsearch-sample-myrag.apps.cluster-2mw5c.sandbox1301.opentlc.com'], basic_auth=('elastic', '7i2W81WNwKU44E4wL8E2a0gz'), verify_certs=False)

print(client.info())

embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')

vector_store = ElasticsearchStore(es_connection= client,
    index_name="rhoai_index",
    embedding=embeddings,
)


def metadata_func(record: dict, metadata: dict) -> dict:
    metadata["content"] = record.get("content")
    metadata["title"] = record.get("title")
    return metadata


loader = JSONLoader(
    file_path="rhoai.json",
    jq_schema=".[]",
    content_key="content",
    metadata_func=metadata_func,
)

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=128, chunk_overlap=64
)
docs = loader.load_and_split(text_splitter=text_splitter)


documents = vector_store.from_documents(
    docs,
    embeddings,
    index_name="rhoai_index",
    es_connection= client
)


results = vector_store.similarity_search("can you list OpenShift AI offerings")
print(results)


{'name': 'elasticsearch-sample-es-default-0', 'cluster_name': 'elasticsearch-sample', 'cluster_uuid': 'VRnmjm8WSSOSZlE6OGzVOA', 'version': {'number': '8.14.2', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '2afe7caceec8a26ff53817e5ed88235e90592a1b', 'build_date': '2024-07-01T22:06:58.515911606Z', 'build_snapshot': False, 'lucene_version': '9.10.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}
[Document(page_content='OpenShift AI integrates the following components and services \n At the service layer \n OpenShift AI dashboard \n A customer-facing dashboard that shows available and installed applications for the OpenShift AI environment as well as learning resources such as tutorials, quick starts, and documentation. Administrative users can access functionality to manage users, clusters, notebook images, accelerator profiles, and model-serving runtimes. Data scientists can use the 

In [13]:

#######################################################################
#      INITIALIZE ELASTICSEARCH CONNECTION AND VECTOR STORE           #
#######################################################################

client = Elasticsearch(['https://elasticsearch-sample-myrag.apps.cluster-2mw5c.sandbox1301.opentlc.com'], basic_auth=('elastic', '7i2W81WNwKU44E4wL8E2a0gz'), verify_certs=False)

print(client.info())

embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')

store = ElasticsearchStore(es_connection= client,
    index_name="rhoai_index",
    embedding=embeddings,
)

#######################################################################
#      INITIALIZE LLM DEPLOYMENT PARAMETERS                           #
#######################################################################

INFERENCE_SERVER_URL = f"http://llm-predictor.myrag.svc.cluster.local:8080/v1"
MODEL_NAME = "llm"
MAX_TOKENS=1024
TOP_P=0.95
TEMPERATURE=0.01
PRESENCE_PENALTY=1.03



{'name': 'elasticsearch-sample-es-default-0', 'cluster_name': 'elasticsearch-sample', 'cluster_uuid': 'VRnmjm8WSSOSZlE6OGzVOA', 'version': {'number': '8.14.2', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '2afe7caceec8a26ff53817e5ed88235e90592a1b', 'build_date': '2024-07-01T22:06:58.515911606Z', 'build_snapshot': False, 'lucene_version': '9.10.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}


In [14]:
template = """
### [INST] 
Instruction: Answer the question based on your 
OpenShift AI knowledge. Here is context to help:

{context}

### QUESTION:
{question} 

[/INST]
 """

os.environ["TOKENIZERS_PARALLELISM"] = "false"

QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

llm = VLLMOpenAI(
    openai_api_key="EMPTY",
    openai_api_base=INFERENCE_SERVER_URL,
    model_name=MODEL_NAME,
    top_p=TOP_P,
    temperature=TEMPERATURE,
    max_tokens=MAX_TOKENS,
    presence_penalty=PRESENCE_PENALTY,
    streaming=True,
    verbose=False,
    callbacks=[StreamingStdOutCallbackHandler()]
)

qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=store.as_retriever(
            search_type="similarity",
            search_kwargs={"k": 4}
            ),
        chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
        return_source_documents=True
        )
os.environ["TOKENIZERS_PARALLELISM"] = "false"


In [15]:
question = "What accelerators are supported on OpenShift AI?"
result = qa_chain.invoke({"query": question})


 OpenShift AI supports two types of accelerators for running machine learning workloads: NVIDIA graphics processing units (GPUs) and Habana Gaudi hardware processing units (HPUs). To use GPUs, you need to install the NVIDIA GPU Operator. For HPUs, you can use the Habana libraries and software associated with Habana Gaudi devices available from your notebook.