In [None]:
pip install -U --quiet ipywidgets langchain langchain-community langchain-core langchainhub chromadb==0.4.15 pysqlite3-binary sentence-transformers pypdf

# Set Up The Model
In this block, we install chromadb and other dependancies.  Chroma requires sqlite3 so that is imported as well.

The LLM that is used is Mistral:Instruct that is hosted by an Ollama container running in OpenShift.

HuggingFace Embeddings are used since they can be run locally and can be configured to take advantage of available GPUs.

In [None]:
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
import chromadb

import bs4
import os.path
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.chat_models import ChatOllama
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer
from typing import List
from IPython.display import display, Markdown

model = ChatOllama(model="mistral:instruct",
                   base_url="http://ollama-api-service.ollama-llm.svc.cluster.local:11434",
                   temperature = 0)

embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2",model_kwargs={'device': 'cuda'})

# Gather Data, Chunk it and Store it in the vector store

If the database is not present, then create it by downloading and chunking the files.  If it is present, then just load it.

In [None]:
persist_dir = "db"

check_file = "False"

path = 'db/chroma.sqlite3'

check_file = os.path.isfile(path)

if check_file is False:
    urls = [
        r'https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/9/html-single/performing_a_standard_rhel_9_installation/index',
        r'https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/9/html-single/performing_an_advanced_rhel_9_installation/index',
        r'https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/9/html-single/configuring_basic_system_settings/index',
        r'https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/9/html-single/security_hardening/index',
        r'https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/9/html-single/composing_a_customized_rhel_system_image/index',
        r'https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/9/html-single/upgrading_from_rhel_8_to_rhel_9/index',
        r'https://www.redhat.com/en/resources/red-hat-enterprise-linux-subscription-guide'
    ]
    
    loader = WebBaseLoader(urls)
    
    docs = loader.load()
    
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1250, chunk_overlap=0)
    
    splits = text_splitter.split_documents(docs)
    
    vectorstore = Chroma.from_documents(documents=splits, embedding=embedding, persist_directory="db")

else:
    
    vectorstore = Chroma(persist_directory=persist_dir, embedding_function=embedding)
    
retriever = vectorstore.as_retriever()

# Run the RAG

In [None]:
from langchain_core.runnables import RunnableParallel
from typing import List
from langchain_core.documents import Document


# Prompt
rag_template = """
Given a question write an answer.
Use only the supplied source docs.
If you don't know the answer, just say that you don't know.  Do not fake the answer.
If the answer is relevant, then ALWAYS include a "SOURCES" part in your answer.

QUESTION: {question}
=========
{source_docs}
=========
ANSWER: 
"""

rag_prompt = ChatPromptTemplate.from_template(rag_template)

def format_docs(docs: List[Document]) -> str:
    return "\n\n".join(
        f"Content: {doc.page_content}\nSource: {doc.metadata['source']}" for doc in docs
    )

rag_chain_from_docs = (
    RunnablePassthrough.assign(
        source_docs=(lambda x: format_docs(x["source_docs"]))
    )
    | rag_prompt
    | model
    | StrOutputParser()
)

rag_chain = RunnableParallel(
    {
        "source_docs": retriever,
        "question": RunnablePassthrough()
    }
).assign(answer=rag_chain_from_docs)

question = "Can RHEL 9 be installed via a USB drive?"

results = (rag_chain.invoke(question))
answer = results["answer"]
display(Markdown(answer))