# Deepchecks RAG LLM Chatbot Workshop

- A mini app to create a simple RAG LLM chatbot
- Source data could be URLs, PDFs, or HTML files

In [10]:
# import sys
# print(sys.version)

In [2]:
!pip install -q langchain openai chromadb tiktoken beautifulsoup4 pypdf unstructured tqdm langchain-community

  You can safely remove it manually.
  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
blis 1.0.1 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.26.4 which is incompatible.
thinc 8.3.2 requires numpy<2.1.0,>=2.0.0; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.


In [None]:
model_name = "gpt-4o-mini"  # foundational model

Now, let's import the necessary libraries and set up our environment:

In [None]:
import os
from langchain.document_loaders import UnstructuredURLLoader, PyPDFLoader, UnstructuredHTMLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma   
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

In [None]:
# Set your OpenAI API key here


## Configuration

1. Define documents to upload to vectorstore
2. Define System prompt & prompt template
3. Define Retrieval system parameters ()

In [None]:
# System prompt + prompt template
system_template = """You are an AI assistant that answers questions based on the given context.
Your responses should be informative and relevant to the question asked.
If you don't know the answer or if the information is not present in the context, say so."""

human_template = """Context: {context}
Question: {question}
Answer: """

# Retrieval system parameters
chunk_size = 1000 # more info to LLM vs
chunk_overlap = 200
k = 4  # No. relevant chunks . Tradeoff - more info, but might confuse bot, or might add unnecessarily indepth answer

## RAG Chatbot Setup

Now, let's set up our RAG chatbot using the configurations we defined earlier:

In [None]:
from functools import partial

def setup_rag_chatbot(urls, pdfs, htmls, system_template, human_template, chunk_size, chunk_overlap, k, model_name):
    
    def load_data(urls, pdfs, htmls):
        documents = []
        if urls:
            url_loader = UnstructuredURLLoader(urls=urls)
            documents.extend(url_loader.load())
        for pdf in pdfs:
            pdf_loader = PyPDFLoader(pdf)
            documents.extend(pdf_loader.load())
        for html in htmls:
            html_loader = UnstructuredHTMLLoader(html)
            documents.extend(html_loader.load())
        return documents

    documents = load_data(urls, pdfs, htmls)
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    texts = text_splitter.split_documents(documents)

    # Instantiate pre-trained embedding model & vector store
    embeddings = OpenAIEmbeddings()
    vectorstore = Chroma.from_documents(texts, embeddings)
    # Retrieve top k results based on similarity score - uses cosine here i.e. dot product between query and vector embedding
    retriever = vectorstore.as_retriever(search_type='similarity', search_kwargs={"k": k})  

    # Instantiate LLM # 
    llm = ChatOpenAI(model_name=model_name, temperature=0)

    # Create Prompt # 
    system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

    # Create RAG chain
    rag_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",  # retrieved docs concatenated into single string. Simplest. Best if small enough to fit within LLM's token limits 
        retriever=retriever,
        return_source_documents=True,
        chain_type_kwargs={"prompt": chat_prompt}
    )
    print("RAG Chatbot is ready!")
    return rag_chain

def ask_rag_chatbot(question, rag_chain):
    result = rag_chain({"query": question})
    return {
        "question": question,
        "answer": result["result"],
        "sources": [doc.metadata.get('source', 'Unknown source') for doc in result["source_documents"]]
    }

def ask_multiple_questions(rag_chain, questions):
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(partial(ask_rag_chatbot, rag_chain=rag_chain), questions), total=len(questions)))
    return results

## Using the RAG Chatbot

In [None]:
urls = ["https://en.wikipedia.org/wiki/Artificial_intelligence",
        "https://en.wikipedia.org/wiki/Machine_learning"]
pdfs = []
htmls = []

rag_chain = setup_rag_chatbot(urls, pdfs, htmls, system_template, human_template, chunk_size, chunk_overlap, k, model_name)
result = ask_rag_chatbot("What is artificial intelligence?", rag_chain)
print(f"Question: {result['question']}")
print(f"Answer: {result['answer']}")
print("Sources:")
for source in result['sources']:
    print(f"- {source}")

# What if multiple questions # 
questions = [
    "What is machine learning?",
    "How does deep learning relate to AI?",
    "What are some applications of AI in healthcare?"
]
results = ask_multiple_questions(rag_chain, questions)

for result in results:
    print(f"\nQuestion: {result['question']}")
    print(f"Answer: {result['answer']}")
    print("Sources:")
    for source in result['sources']:
        print(f"- {source}")

RAG Chatbot is ready!
Question: What is artificial intelligence?
Answer: Artificial intelligence (AI) is the intelligence exhibited by machines, particularly computer systems. It is a field of research within computer science that focuses on developing methods and software that enable machines to perceive their environment and use learning and intelligence to take actions aimed at achieving defined goals. AI encompasses various approaches and applications, including machine learning, natural language processing, robotics, and more.
Sources:
- https://en.wikipedia.org/wiki/Artificial_intelligence
- https://en.wikipedia.org/wiki/Artificial_intelligence
- https://en.wikipedia.org/wiki/Artificial_intelligence
- https://en.wikipedia.org/wiki/Artificial_intelligence


100%|██████████| 3/3 [00:03<00:00,  1.29s/it]


Question: What is machine learning?
Answer: Machine learning (ML) is a field of study within artificial intelligence that focuses on the development and analysis of statistical algorithms that enable systems to learn from data and generalize to new, unseen data. This allows machines to perform tasks without explicit instructions. ML encompasses various types of learning, including supervised learning (which involves labeled training data) and unsupervised learning (which identifies patterns in data without guidance). It finds applications in numerous fields such as natural language processing, computer vision, speech recognition, and predictive analytics in business. The foundational methods of ML are rooted in statistics and mathematical optimization.
Sources:
- https://en.wikipedia.org/wiki/Machine_learning
- https://en.wikipedia.org/wiki/Artificial_intelligence
- https://en.wikipedia.org/wiki/Machine_learning
- https://en.wikipedia.org/wiki/Machine_learning

Question: How does deep




# Log to Deepchecks

Can integrate Deepchecks LLM Eval to log and analyze performance of RAG chatbot
Easy comparisons of different chatbots

In [None]:
!pip install -q deepchecks-llm-client  # Deepchecks Eval client

If looking to use Deepchecks client to log your RAG chatbot and appear in dashboard, else can avoid

In [None]:
from deepchecks_llm_client.client import DeepchecksLLMClient
from deepchecks_llm_client.data_types import EnvType, AnnotationType, LogInteractionType, ApplicationType
# Initialize Deepchecks LLM Eval client - need to login to GUI to get API key https://app.llm.deepchecks.com/configuration/api-key
dc_client = DeepchecksLLMClient(
    api_token="c3dpbGxpYW1zY290dDdAZ21haWwuY29t.b3JnX25vdl8yNF93b3Jrc2hvcF8xYjBkZTBlYzdhZGRlYTJh.aXyE4bhCwafQfgLHfSnm-Q",  # Replace `"YOUR_API_TOKEN_HERE"` with your actual Deepchecks LLM Eval API token.
)
APP_NAME = f'RAGChatbot - "Stuart"' # appears in GUI dashboard like this
dc_client.create_application(APP_NAME, ApplicationType.QA)

Now, let's modify our `ask_rag_chatbot` function to log interactions to Deepchecks LLM Eval:

In [12]:
def ask_rag_chatbot(question, rag_chain, version_name):
    result = rag_chain({"query": question})

    # Log the interaction to Deepchecks LLM Eval
    dc_client.log_interaction(
        app_name=APP_NAME,
        version_name=version_name,
        env_type=EnvType.EVAL,
        input=question,
        output=result["result"],
        information_retrieval=[doc.page_content for doc in result["source_documents"]],
        user_interaction_id=hash(question)  # Each unique question will get it's own ID across all versions
    )

    return {
        "question": question,
        "answer": result["result"],
        "sources": [doc.metadata.get('source', 'Unknown source') for doc in result["source_documents"]]
    }

In [13]:
def ask_multiple_questions(rag_chain, questions, version_name):
    _ = dc_client.create_app_version(
            app_name=APP_NAME,
            version_name=version_name,
            description=""
        )
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(partial(ask_rag_chatbot, rag_chain=rag_chain, version_name=version_name), questions), total=len(questions)))
    return results

Using deepcheck to compare different model configs for your chatbot

In [None]:
questions = [
    "What is artificial intelligence?",
    "How does machine learning relate to AI?",
    "What are some applications of deep learning?"
]

# Can log interactions for different configurations
print("Configuration 1 (chunk_size=1000, k=4):")
chunk_size_1, chunk_overlap_1, k_1 = 1000, 200, 4
version_name = "v1_chunk1000_k4"
rag_chain_1 = setup_rag_chatbot(urls, pdfs, htmls, system_template, human_template, chunk_size_1, chunk_overlap_1, k_1, model_name)
_ = ask_multiple_questions(rag_chain_1, questions, version_name)


print("Configuration 2 (chunk_size=500, k=6):")
chunk_size_2, chunk_overlap_2, k_2 = 500, 100, 6
version_name = "v2_chunk500_k6"
rag_chain_2 = setup_rag_chatbot(urls, pdfs, htmls, system_template, human_template, chunk_size_2, chunk_overlap_2, k_2, model_name)
_ = ask_multiple_questions(rag_chain_2, questions, version_name)

Configuration 1 (chunk_size=1000, k=4):


100%|██████████| 3/3 [00:03<00:00,  1.05s/it]


Configuration 2 (chunk_size=500, k=6):


100%|██████████| 3/3 [00:02<00:00,  1.16it/s]


1. Go to the Deepchecks LLM Eval web interface (https://app.llm.deepchecks.com).
2. Select "RAGChatbot" application
3. Compare different versions (e.g., "v1_chunk1000_k4" and "v2_chunk500_k6").
4. Compare individual samples OR whole versions : https://app.llm.deepchecks.com/configuration/versions