In [None]:
pip install --quiet -U ipywidgets langchain langchain-community langchain-core langchainhub tiktoken chromadb pypdf pysqlite3-binary sentence-transformers unstructured

# Set Up The Model
In this block, we install chromadb and other dependancies.  Chroma requires sqlite3 so that is imported as well.

The LLM that is used is Mistral:Instruct that is hosted by an Ollama container running in OpenShift.

HuggingFace Embeddings are used since they can be run locally and can be configured to take advantage of available GPUs.

In [None]:
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
import chromadb
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction

import bs4
import os.path
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.chat_models import ChatOllama
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough, RunnableParallel
from langchain_core.prompts import ChatPromptTemplate
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.utils.math import cosine_similarity
from langchain_text_splitters import RecursiveCharacterTextSplitter, SentenceTransformersTokenTextSplitter
from pypdf import PdfReader
from sentence_transformers import SentenceTransformer
from typing import List
from IPython.display import display, Markdown

model = ChatOllama(model="mistral:instruct",
                   base_url="http://ollama-api-service.ollama-llm.svc.cluster.local:11434",
                   temperature = 0)

# Gather Data, Chunk it and Store it in the vector store

If the database is not present, then create it by downloading and chunking the files.  If it is present, then just load it.

In [None]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2",model_kwargs={'device': 'cuda'})
embedding_function = SentenceTransformerEmbeddingFunction()

In [None]:
persist_dir = "db_rhel"

check_file = "False"

path = 'db_rhel/chroma.sqlite3'

check_file = os.path.isfile(path)

if check_file is False:
    urls = [
        'https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/9/pdf/performing_a_standard_rhel_9_installation/red_hat_enterprise_linux-9-performing_a_standard_rhel_9_installation-en-us.pdf',
        'https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/9/pdf/performing_an_advanced_rhel_9_installation/red_hat_enterprise_linux-9-performing_an_advanced_rhel_9_installation-en-us.pdf',
        'https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/9/pdf/configuring_basic_system_settings/red_hat_enterprise_linux-9-configuring_basic_system_settings-en-us.pdf',
        'https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/9/pdf/security_hardening/red_hat_enterprise_linux-9-security_hardening-en-us.pdf',
        'https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/9/pdf/composing_a_customized_rhel_system_image/red_hat_enterprise_linux-9-composing_a_customized_rhel_system_image-en-us.pdf',
        'https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/9/pdf/configuring_and_managing_networking/red_hat_enterprise_linux-9-configuring_and_managing_networking-en-us.pdf',
        'https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/9/pdf/upgrading_from_rhel_8_to_rhel_9/red_hat_enterprise_linux-9-upgrading_from_rhel_8_to_rhel_9-en-us.pdf',
        'https://www.redhat.com/rhdc/managed-files/li-linux-rhel-subscription-guide-detail-639715pr-202312-en_0.pdf'
    ]
    
    pages = []
    
    for file in urls:
        loader = PyPDFLoader(file, extract_images=False)
        pages = pages + loader.load()
        
    text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n", ". ", " ", ""], chunk_size=1275, chunk_overlap=0)
    
    splits = text_splitter.split_documents(pages)
    
    vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings, persist_directory=persist_dir)

In [None]:
persist_dir = "db_aap"

check_file = "False"

path = 'db_aap/chroma.sqlite3'

check_file = os.path.isfile(path)

if check_file is False:
    urls = [
        'https://access.redhat.com/documentation/en-us/red_hat_ansible_automation_platform/2.4/pdf/containerized_ansible_automation_platform_installation_guide/red_hat_ansible_automation_platform-2.4-containerized_ansible_automation_platform_installation_guide-en-us.pdf',
        'https://access.redhat.com/documentation/en-us/red_hat_ansible_automation_platform/2.4/pdf/red_hat_ansible_automation_platform_installation_guide/red_hat_ansible_automation_platform-2.4-red_hat_ansible_automation_platform_installation_guide-en-us.pdf',
        'https://access.redhat.com/documentation/en-us/red_hat_ansible_automation_platform/2.4/pdf/red_hat_ansible_automation_platform_operations_guide/red_hat_ansible_automation_platform-2.4-red_hat_ansible_automation_platform_operations_guide-en-us.pdf',
        'https://access.redhat.com/documentation/en-us/red_hat_ansible_automation_platform/2.4/pdf/automation_controller_user_guide/red_hat_ansible_automation_platform-2.4-automation_controller_user_guide-en-us.pdf',
        'https://access.redhat.com/documentation/en-us/red_hat_ansible_automation_platform/2.4/pdf/getting_started_with_ansible_playbooks/red_hat_ansible_automation_platform-2.4-getting_started_with_ansible_playbooks-en-us.pdf',
        'https://access.redhat.com/documentation/en-us/red_hat_ansible_automation_platform/2.4/pdf/deploying_the_red_hat_ansible_automation_platform_operator_on_openshift_container_platform/red_hat_ansible_automation_platform-2.4-deploying_the_red_hat_ansible_automation_platform_operator_on_openshift_container_platform-en-us.pdf'
    ]

    pages = []
    
    for file in urls:
        loader = PyPDFLoader(file, extract_images=False)
        pages = pages + loader.load()
        
    text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n", ". ", " ", ""], chunk_size=1700, chunk_overlap=0)
    
    splits = text_splitter.split_documents(pages)
    
    vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings, persist_directory=persist_dir)

In [None]:
persist_dir = "db_ocp"

check_file = "False"

path = 'db_ocp/chroma.sqlite3'

check_file = os.path.isfile(path)

if check_file is False:
    urls = [
        'https://access.redhat.com/documentation/en-us/openshift_container_platform/4.15/pdf/building_applications/openshift_container_platform-4.15-building_applications-en-us.pdf',
        'https://access.redhat.com/documentation/en-us/assisted_installer_for_openshift_container_platform/2024/pdf/installing_openshift_container_platform_with_the_assisted_installer/assisted_installer_for_openshift_container_platform-2024-installing_openshift_container_platform_with_the_assisted_installer-en-us.pdf',
        'https://access.redhat.com/documentation/en-us/openshift_container_platform/4.15/pdf/storage/openshift_container_platform-4.15-storage-en-us.pdf',
        'https://access.redhat.com/documentation/en-us/openshift_container_platform/4.15/pdf/cli_tools/openshift_container_platform-4.15-cli_tools-en-us.pdf',
        'https://access.redhat.com/documentation/en-us/openshift_container_platform/4.15/pdf/virtualization/openshift_container_platform-4.15-virtualization-en-us.pdf',
        'https://access.redhat.com/documentation/en-us/openshift_container_platform/4.15/pdf/windows_container_support_for_openshift/openshift_container_platform-4.15-windows_container_support_for_openshift-en-us.pdf'
    ]

    pages = []
    
    for file in urls:
        loader = PyPDFLoader(file, extract_images=False)
        pages = pages + loader.load()
        
    text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n", ". ", " ", ""], chunk_size=675, chunk_overlap=0)
    
    splits = text_splitter.split_documents(pages)
    
    vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings, persist_directory=persist_dir)

# Set up the prompts

In [None]:
# Set up the prompts

rhel_template = """You are an expert in Red Hat Enterprise Linux (RHEL). \
You can only use the supplied source docs. \
When you don't know the answer to a question just admit that you don't know. \
If the question is not related to RHEL answer that the questions is not relevant.
Always include a "SOURCES" part in your answer with link to the source_docs.

QUESTION: {question}
=========
{source_docs}
=========
ANSWER: """

ansible_template = """You are knowledgeable about Ansible Automation Platform (AAP) and the Ansible Automation Platform Operator on OpenShift (OCP). \
You can only use the supplied source docs. \
When you don't know the answer to a question just admit that you don't know.
If the question is not related to Ansible answer that the questions is not relevant.
Always include a "SOURCES" part in your answer.

QUESTION: {question}
=========
{source_docs}
=========
ANSWER: """

openshift_template = """You are knowledgeable about OpenShift Container Platform (OCP) and it's CLI tool oc. \
You can only use the supplied source docs. \
When you don't know the answer to a question just admit that you don't know.
If the question is not related to OpenShift answer that the questions is not relevant.
Always include a "SOURCES" part in your answer.

QUESTION: {question}
=========
{source_docs}
=========
ANSWER: """

rag_prompt = ChatPromptTemplate.from_template(rhel_template)

prompt_templates = [rhel_template, ansible_template, openshift_template]
prompt_embeddings = embeddings.embed_documents(prompt_templates)

# Run the RAG

In [None]:
# the prompt_router function detemines which product the question is most likely about
# and returns the name of the product

def prompt_router(input):
    query_embedding = embeddings.embed_query(input["question"])
    similarity = cosine_similarity([query_embedding], prompt_embeddings)[0]
    most_similar = prompt_templates[similarity.argmax()]
    
    if most_similar == rhel_template:
        product = "rhel"
    elif most_similar == ansible_template:
        product = "aap"        
    elif most_similar == openshift_template:
        product = "ocp" 
    
    return product

# Format the source docs forthe LLM
def format_docs(docs: List[Document]) -> str:
    return "\n\n".join(f"Content: {doc.page_content}\nSource: {doc.metadata['source']}" for doc in docs)


# The router_chain calls the prompt-router function t determine the topic of the question.
router_chain = (
    {"question": RunnablePassthrough()}
    | RunnableLambda(prompt_router)
)

# Process the source_docs to generate the answer
rag_chain_from_docs = (
    RunnablePassthrough.assign(
        source_docs=(lambda x: format_docs(x["source_docs"]))
    )
    | rag_prompt
    | model
    | StrOutputParser()
)

# Retrieve source docs and invoke the last chain.
rag_chain = RunnableParallel(
    {
        "source_docs": lambda x: retriever,
        "question": RunnablePassthrough()
    }
).assign(answer=rag_chain_from_docs)

# Main loop - get the question, figures out the topic, routes to the right db and 
# then works to generate the answer.

question = "How many VM entitlements are included in a single RHEL subscription?"

product = (router_chain.invoke(question))

if product == "rhel":
    rag_prompt = ChatPromptTemplate.from_template(rhel_template)
    vectorstore = Chroma(persist_directory="db_rhel", embedding_function=embeddings)
    retriever = vectorstore.as_retriever()
elif product == "aap":
    rag_prompt = ChatPromptTemplate.from_template(ansible_template)
    vectorstore = Chroma(persist_directory="db_aap", embedding_function=embeddings)
    retriever = vectorstore.as_retriever()
elif product == "ocp":
    rag_prompt = ChatPromptTemplate.from_template(openshift_template)
    vectorstore = Chroma(persist_directory="db_ocp", embedding_function=embeddings)
    retriever = vectorstore.as_retriever()

results = (rag_chain.invoke(question))

answer = results["answer"]
display(Markdown(answer))