

#   RAG EVALUATION  - QA RAG System on Asthma - RAGAS FRAMEWORK

#   CONTEXT_PRECISION, CONTEXT_RECALL, CORRECTNESS , RELEVANCY , FAITHFULNESS

## Install OpenAI, and LangChain dependencies

In [1]:
!pip install langchain==0.3.11
!pip install langchain-openai==0.2.12
!pip install langchain-community==0.3.11
!pip install langchain-chroma==0.1.4
!pip install sentence-transformers==2.7.0

Collecting langchain==0.3.11
  Downloading langchain-0.3.11-py3-none-any.whl.metadata (7.1 kB)
Downloading langchain-0.3.11-py3-none-any.whl (1.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m24.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain
  Attempting uninstall: langchain
    Found existing installation: langchain 0.3.14
    Uninstalling langchain-0.3.14:
      Successfully uninstalled langchain-0.3.14
Successfully installed langchain-0.3.11
Collecting langchain-openai==0.2.12
  Downloading langchain_openai-0.2.12-py3-none-any.whl.metadata (2.7 kB)
Collecting tiktoken<1,>=0.7 (from langchain-openai==0.2.12)
  Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Downloading langchain_openai-0.2.12-py3-none-any.whl (50 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.7/50.7 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tiktoke

## Enter Open AI API Key

In [2]:
from getpass import getpass

OPENAI_KEY = getpass('Enter Open AI API Key: ')

Enter Open AI API Key: ··········


In [3]:
import os
OPENAI_API_KEY = OPENAI_KEY
os.environ['OPENAI_API_KEY'] = OPENAI_KEY

### Open AI Embedding Models

LangChain enables us to access Open AI embedding models which include the newest models: a smaller and highly efficient `text-embedding-3-small` model, and a larger and more powerful `text-embedding-3-large` model.

In [4]:
from langchain_openai import OpenAIEmbeddings
openai_embed_model = OpenAIEmbeddings(
    model='text-embedding-3-small',
    openai_api_key=OPENAI_KEY  # Use the api_key from getpass
)

In [5]:
%%capture
!pip install unstructured selenium textstat tiktoken

In [6]:
import tiktoken
import matplotlib.pyplot as plt
import pandas as pd
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from textstat import flesch_reading_ease
from collections import Counter

In [7]:
!pip install -q ragas langchain-experimental

from ragas import evaluate
from datasets import Dataset

from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall, context_entity_recall, answer_similarity, answer_correctness

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/176.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.9/176.9 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/209.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.2/209.2 kB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/45.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/480.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m31.6 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [8]:
website = "https://my.clevelandclinic.org/health/diseases/6424-asthma"

In [9]:
def load_document(loader_class, website_url):
    """
    Load a document using the specified loader class and website URL.

    Args:
    loader_class (class): The class of the loader to be used.
    website_url (str): The URL of the website from which to load the document.

    Returns:
    str: The loaded document.
    """
    loader = loader_class([website_url])
    return loader.load()

In [10]:
import nltk
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


True

In [11]:
from langchain.document_loaders import UnstructuredURLLoader, SeleniumURLLoader

In [12]:
selenium_loader_doc = load_document(SeleniumURLLoader, website)

In [13]:
len(selenium_loader_doc)

1

In [14]:
selenium_loader_doc[0]

Document(metadata={'source': 'https://my.clevelandclinic.org/health/diseases/6424-asthma', 'title': 'Asthma: Types, Causes, Symptoms, Diagnosis & Treatment', 'description': 'Asthma is a chronic disease that affects the airways of your lungs. It can be managed but not cured. ', 'language': 'en'}, page_content='Locations:\n\nAbu Dhabi|Canada|Florida|London|Nevada|Ohio|\n\nHome/\n\nHealth Library/\n\nDiseases & Conditions/\n\nAsthma\n\nAdvertisement\n\nAdvertisement\n\nAsthma\n\nBronchial asthma (or asthma) is a lung disease. Your airways get narrow and swollen and are blocked by excess mucus. Medications can treat these symptoms.\n\nAdvertisement\n\nCleveland Clinic is a non-profit academic medical center. Advertising on our site helps support our mission. We do not endorse non-Cleveland Clinic products or services. Policy\n\nCare at Cleveland Clinic\n\nAsthma Treatment for Adults\n\nFind a Doctor and Specialists\n\nAsthma Treatment for Children\n\nFind a Pediatric Doctor and Specialists

### Split larger documents into smaller chunks

In [15]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1450, chunk_overlap=200)
chunked_docs = splitter.split_documents(selenium_loader_doc)

## Vector Databases

One of the most common ways to store and search over unstructured data is to embed it and store the resulting embedding vectors, and then at query time to embed the unstructured query and retrieve the embedding vectors that are 'most similar' to the embedded query. A vector database takes care of storing embedded data and performing vector search for you.

### Chroma Vector DB

[Chroma](https://docs.trychroma.com/getting-started) is a AI-native open-source vector database focused on developer productivity and happiness. Chroma is licensed under Apache 2.0.

### Create a Vector DB and persist on disk

Here we initialize a connection to a Chroma vector DB client, and also we want to save to disk, so we simply initialize the Chroma client and pass the directory where we want the data to be saved to.

In [16]:
from langchain_chroma import Chroma

chroma_db = Chroma.from_documents(documents=chunked_docs,
                                  collection_name='rag_asthma_db',
                                  embedding=openai_embed_model,
                                  collection_metadata={"hnsw:space": "cosine"},
                                  persist_directory="./asthma_db")

### Load Vector DB from disk

This is just to show once you have a vector database on disk you can just load and create a connection to it anytime

In [17]:
# load from disk
chroma_db = Chroma(persist_directory="./asthma_db",
                   collection_name='rag_asthma_db',
                   embedding_function=openai_embed_model)

In [None]:
chroma_db

<langchain_chroma.vectorstores.Chroma at 0x73f90f3d9ad0>

## Load Connection to LLM

Here we create a connection to ChatGPT to use later in our chains

In [18]:
from langchain_openai import ChatOpenAI

chatgpt = ChatOpenAI(model_name='gpt-4o-mini', temperature=0,openai_api_key=OPENAI_KEY)

## Chained Retrieval Pipeline

This strategy uses a chain of multiple retrievers sequentially to get to the most relevant documents. The following is the flow

Similarity Retrieval → Compression Filter → Reranker Model Retrieval

![](https://i.imgur.com/KriNRDJ.gif)


In [19]:
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain.retrievers.document_compressors import LLMChainFilter
from langchain.retrievers import ContextualCompressionRetriever

# Retriever 1 - simple cosine distance based retriever
similarity_retriever = chroma_db.as_retriever(search_type="similarity",
                                              search_kwargs={"k": 5})

#  decides which of the initially retrieved documents to filter out and which ones to return
_filter = LLMChainFilter.from_llm(llm=chatgpt)
# Retriever 2 - retrieves the documents similar to query and then applies the filter
compressor_retriever = ContextualCompressionRetriever(
    base_compressor=_filter, base_retriever=similarity_retriever
)

# download an open-source reranker model - BAAI/bge-reranker-v2-m3
reranker = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-large")
reranker_compressor = CrossEncoderReranker(model=reranker, top_n=3)
# Retriever 3 - Uses a Reranker model to rerank retrieval results from the previous retriever
final_retriever = ContextualCompressionRetriever(
    base_compressor=reranker_compressor, base_retriever=compressor_retriever
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/801 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]

In [20]:
query = "What asthma treatment options are there?"
docs = final_retriever.invoke(query)
docs

[Document(metadata={'description': 'Asthma is a chronic disease that affects the airways of your lungs. It can be managed but not cured. ', 'language': 'en', 'source': 'https://my.clevelandclinic.org/health/diseases/6424-asthma', 'title': 'Asthma: Types, Causes, Symptoms, Diagnosis & Treatment'}, page_content='Diagnosis and Tests\n\nHow do healthcare providers diagnose asthma?\n\nYour healthcare provider will review your medical history, including information about your parents and siblings. Your provider will also ask you about your symptoms. Your provider will need to know any history of allergies, eczema (a bumpy rash caused by allergies) and other lung diseases.\n\nYour provider may order spirometry. This test measures airflow through your lungs and is used to diagnose and monitor your progress with treatment. Your healthcare provider may order a chest X-ray, blood test or skin test.\n\nManagement and Treatment\n\nWhat asthma treatment options are there?\n\nYou have options to help

In [21]:
query = "How do healthcare providers diagnose asthma?"
docs = final_retriever.invoke(query)
docs

[Document(metadata={'description': 'Asthma is a chronic disease that affects the airways of your lungs. It can be managed but not cured. ', 'language': 'en', 'source': 'https://my.clevelandclinic.org/health/diseases/6424-asthma', 'title': 'Asthma: Types, Causes, Symptoms, Diagnosis & Treatment'}, page_content='Diagnosis and Tests\n\nHow do healthcare providers diagnose asthma?\n\nYour healthcare provider will review your medical history, including information about your parents and siblings. Your provider will also ask you about your symptoms. Your provider will need to know any history of allergies, eczema (a bumpy rash caused by allergies) and other lung diseases.\n\nYour provider may order spirometry. This test measures airflow through your lungs and is used to diagnose and monitor your progress with treatment. Your healthcare provider may order a chest X-ray, blood test or skin test.\n\nManagement and Treatment\n\nWhat asthma treatment options are there?\n\nYou have options to help

## Build a QA RAG Chain

To build a RAG chain we need a prompt template which instructs the LLM to not answer questions beyond the scope of the retrieved context documents, there are various such prompts out there, we craft one ourselves below

In [22]:
from langchain_core.prompts import ChatPromptTemplate

prompt = """You are an assistant for question-answering tasks.
            Use the following pieces of retrieved context to answer the question.
            If no context is present or if you don't know the answer, just say that you don't know.
            Do not make up the answer unless it is there in the provided context.
            Give a detailed answer with regard to the question.

            Question:
            {question}

            Context:
            {context}

            Answer:
         """

prompt_template = ChatPromptTemplate.from_template(prompt)

## QA RAG Chain



In [23]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

qa_chain = RetrievalQA.from_chain_type(
        llm=chatgpt,
        retriever=final_retriever,
        return_source_documents=True,
        chain_type_kwargs={"prompt": PromptTemplate.from_template(prompt)}
    )

In [24]:
queries = [
    "what are Bronchodilators?",
    "How do healthcare providers diagnose asthma?",
    "Who can get asthma?",
    "Can asthma be cured?",
    "What causes asthma?"

]

ground_truths = [
    "These medicines relax the muscles around your airways. The relaxed muscles let the airways move air. They also let mucus move more easily through the airways. These medicines relieve your symptoms when they happen and are used for intermittent and chronic asthma",
    "Your healthcare provider will review your medical history, including information about your parents and siblings. Your provider will also ask you about your symptoms. Your provider will need to know any history of allergies, eczema (a bumpy rash caused by allergies) and other lung diseases.Your provider may order spirometry. This test measures airflow through your lungs and is used to diagnose and monitor your progress with treatment. Your healthcare provider may order a chest X-ray, blood test or skin test.",
    "Anyone can develop asthma at any age. People with allergies or people exposed to tobacco smoke are more likely to develop asthma. This includes secondhand smoke (exposure to someone else who is smoking) and thirdhand smoke (exposure to clothing or surfaces in places where some has smoked).Statistics show that people assigned female at birth tend to have asthma more than people assigned male at birth. Asthma affects Black people more frequently than other races.",
    "No. Asthma can’t be cured, but it can be managed. Children may outgrow asthma as they get older.",
    "Researchers don’t know why some people have asthma while others don’t. But certain factors present a higher risk:Allergies: Having allergies can raise your risk of developing asthma.Environmental factors: People can develop asthma after exposure to things that irritate the airways. These substances include allergens, toxins, fumes and second- or third-hand smoke. These can be especially harmful to infants and young children whose immune systems haven’t finished developing.Genetics: If your family has a history of asthma or allergic diseases, you have a higher risk of developing the disease.Respiratory infections: Certain respiratory infections, such as respiratory syncytial virus (RSV), can damage young children’s developing lungs."
]

In [25]:

results = []
contexts = []
for query in queries:
    result = qa_chain({"query": query})

    results.append(result['result'])
    sources = result["source_documents"]
    contents = []
    for i in range(len(sources)):
        contents.append(sources[i].page_content)
    contexts.append(contents)

d = {
    "question": queries,
    "answer": results,
    "contexts": contexts,
    "ground_truth": ground_truths
}

  result = qa_chain({"query": query})


In [26]:
!pip install openai



In [28]:
dataset = Dataset.from_dict(d)
from ragas import evaluate
#from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall, context_entity_recall, answer_similarity, answer_correctness, harmfulness
score = evaluate(dataset, metrics=[faithfulness, answer_relevancy, context_precision, context_recall, context_entity_recall, answer_similarity, answer_correctness])
score_df = score.to_pandas()
#score_df.to_csv("EvaluationScores.csv", encoding="utf-8", index=False)

score_df[['faithfulness','answer_relevancy', 'context_precision', 'context_recall',
       'context_entity_recall', 'answer_correctness'
       ]].mean(axis=0)

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Unnamed: 0,0
faithfulness,0.971429
answer_relevancy,0.759937
context_precision,1.0
context_recall,0.971429
context_entity_recall,0.455962
answer_correctness,0.761975
