In [None]:
pip install llama-index
pip install llama-index-core
pip install llama-index-llms-openai
pip install llama-index-llms-replicate
pip install llama-index-embeddings-huggingface
pip install llama-index-embeddings-langchain
pip install llama-index-vector-stores-faiss
pip install faiss-cpu
pip install llama-index-postprocessor-flag-embedding-reranker
pip install git+https://github.com/FlagOpen/FlagEmbedding.git # make sure git has been installed first
# install ollama
# https://ollama.com/download/windows

In [None]:
import json
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter, SemanticSplitterNodeParser
from llama_index.core.schema import MetadataMode
from llama_index.llms.ollama import Ollama
from llama_index.core.prompts import PromptTemplate

# This module patches asyncio to allow nested use of asyncio.run and loop.run_until_complete.
import nest_asyncio
nest_asyncio.apply()

from llama_index.core.extractors import (
    SummaryExtractor,
    QuestionsAnsweredExtractor,
    TitleExtractor,
    KeywordExtractor,
)
from llama_index.extractors.entity import EntityExtractor
from llama_index.core.ingestion import IngestionPipeline
from llama_index.embeddings.langchain import LangchainEmbedding
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core import Settings
from llama_index.core import Document
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.core import VectorStoreIndex, SimpleKeywordTableIndex, RAKEKeywordTableIndex
import faiss


from llama_index.core.query_engine import CustomQueryEngine
from llama_index.core.retrievers import BaseRetriever
from llama_index.core import get_response_synthesizer
from llama_index.core.response_synthesizers import BaseSynthesizer

# import QueryBundle
from llama_index.core import QueryBundle

# import NodeWithScore
from llama_index.core.schema import NodeWithScore

from llama_index.core.retrievers import (
    BaseRetriever,
    VectorIndexRetriever,
    KeywordTableSimpleRetriever
)


In [18]:
# test llama 3.2, pull llama3.2 using Ollam pull llama3.2
llm = Ollama(model="llama3.2", temperature=0.6, request_timeout=120.0) # The default temperature is 0.6. we may run llm in GPU
resp = llm.complete("Who is Paul Graham?")
print(resp)

Paul Graham is a well-known American entrepreneur, venture capitalist, and author. He is best known for co-founding Y Combinator (YC), a highly successful startup accelerator program that has invested in many notable companies such as Dropbox, Reddit, Airbnb, and Stripe.

Graham was born in 1970 in San Francisco, California. He dropped out of Harvard University to pursue his entrepreneurial endeavors. In the late 1990s, he co-founded several startups, including Cruise Control, which was sold to PalmSource, a subsidiary of Access Company.

In 2005, Graham co-founded Y Combinator with Robert Noke and Jessica Livingston. The program's mission is to provide seed funding and mentorship to early-stage startups in exchange for equity. YC has become one of the most successful startup accelerators, having invested in over 2,000 companies since its inception.

Graham is also known for his writings on entrepreneurship and venture capital. He has written several articles and essays on topics such 

In [2]:
# set llm model 
# llama3.2 1B parameters  context length of 128K, Up to 9T tokens
llm = Ollama(model="llama3.2", request_timeout=300.0) # we may need to increase request_timeout to handle a large number of docs
Settings.llm = llm

# set embedding model, sequence length 512 for BAAI/bge-small-en-v1.5
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.embed_model = embed_model

In [3]:
# Load documents
directory_path = './RAG_assignment/Policy_exm'
reader = SimpleDirectoryReader(input_dir=directory_path)
docs = reader.load_data()

print("the number of docs: ", len(docs))

the number of docs:  3


In [4]:
# add title for each doc and delete the first line for doc content              
for cd in docs:
    parts = cd.get_content().split("\n", 1)
    metadata_additions =  {"doc_title": parts[0].strip().lstrip('#').strip()}
    cd.metadata.update(metadata_additions)
    cd.text = parts[1]
print(docs[0].metadata)

{'file_path': 'C:\\Users\\liche\\Documents\\RAG_assignment\\RAG_assignment\\Policy_exm\\Policy_1.txt', 'file_name': 'Policy_1.txt', 'file_type': 'text/plain', 'file_size': 6393, 'creation_date': '2024-05-09', 'last_modified_date': '2024-12-03', 'doc_title': 'Comprehensive Data Privacy Policy'}


In [5]:
import re
from typing import Any, List, Optional, Sequence

from llama_index.core.callbacks.base import CallbackManager
from llama_index.core.node_parser.interface import NodeParser
from llama_index.core.node_parser.node_utils import build_nodes_from_splits
from llama_index.core.schema import BaseNode, MetadataMode, TextNode
from llama_index.core.utils import get_tqdm_iterable

# create a customized section chunking node parser
class SectionNodeParser(NodeParser):
    """Section node parser./using section based splitting logic.
    Each node contains its text content and the section name leading to it.

    Args:
        include_metadata (bool): whether to include metadata in nodes
        include_prev_next_rel (bool): whether to include prev/next relationships
    """
    
    @classmethod
    def from_defaults(
        cls,
        include_metadata: bool = True,
        include_prev_next_rel: bool = True,
        callback_manager: Optional[CallbackManager] = None,
    ) -> "SectionNodeParser":
        callback_manager = callback_manager or CallbackManager([])
        return cls(
            include_metadata=include_metadata,
            include_prev_next_rel=include_prev_next_rel,
            callback_manager=callback_manager,
        )

    def get_nodes_from_node(self, node: BaseNode) -> List[TextNode]:
        """Get nodes from document by splitting on headers."""
        text = node.get_content(metadata_mode=MetadataMode.NONE)

        section_pattern =  r'(\d+(\.\d+)*\.\s[^\n]+)'  
        # Match sections and subsections using regex
        matches = list(re.finditer(section_pattern, text))
        
        nodes = []
        # If no matches are found, skip this document
        if not matches:
            print("No sections matched. Please check the document format or regex.")
            
        # Process matches to split the document into chunks
        for i, match in enumerate(matches):
            # Section title
            section_title = match.group(0).strip()
            # Start of the next section or end of the document
            next_start = matches[i + 1].start() if i + 1 < len(matches) else len(text)
            # Extract content between the current and next section
            section_content = text[match.end():next_start].strip()
            
            # Combine section title and content into a single node text
            node_text = f"{section_title}\n{section_content}"

            # Create a Node with the text and metadata
            nodes.append(self._build_node_from_split(node_text, node,  metadata={"section_title": section_title}))
            
        return nodes

    def _build_node_from_split(
        self,
        text_split: str,
        node: BaseNode,
        metadata: dict,
    ) -> TextNode:
        """Build node from single text split."""
        node = build_nodes_from_splits([text_split], node, id_func=self.id_func)[0]

        if self.include_metadata:
            node.metadata = {**node.metadata, **metadata}

        return node

    def _parse_nodes(
        self,
        nodes: Sequence[BaseNode],
        show_progress: bool = False,
        **kwargs: Any,
    ) -> List[BaseNode]:
        """Parse nodes."""
        all_nodes: List[BaseNode] = []
        nodes_with_progress = get_tqdm_iterable(nodes, show_progress, "Parsing nodes")

        for node in nodes_with_progress:
            nodes = self.get_nodes_from_node(node)
            all_nodes.extend(nodes)

        return all_nodes



In [None]:
# create nodes and extract meta data for each node
from llama_index.core.schema import Document

transformations = [
    # SemanticSplitterNodeParser(embed_model=OpenAIEmbedding()), # if we want to use OpenAI embedding
    # SemanticSplitterNodeParser(embed_model=embed_model), # semantic splitter but I found it is not good for our case
    SectionNodeParser(), 
    # TitleExtractor(nodes=5, llm=llm), # flexible to set llm here, title may not be necessary for this case because context is not long
    QuestionsAnsweredExtractor(questions=3, llm=llm),
    SummaryExtractor(summaries=["prev", "self"], llm=llm),
    KeywordExtractor(keywords=5, llm=llm)
    # EntityExtractor(prediction_threshold=0.5, device="cpu", llm=llm), # entity is not necessary for this case
]

pipeline = IngestionPipeline(transformations=transformations)

# Run the pipeline

import time
# Save timestamp
start = time.time()
nodes = pipeline.run(documents=docs, num_workers=1) 

# Save timestamp
end = time.time()
print("the time taken for node creating {} s ".format(end - start))
print("the number of nodes: ", len(nodes))

# if the number of docs are large, we split them into bins for preprocessing

# initial_bin_size = 100
# num_initial_bins = 21
# total_range = len(docs)
# # Create the first four bins with size 500 each
# bins = [(i * initial_bin_size, (i + 1) * initial_bin_size) for i in range(num_initial_bins)]
# # Add the remaining range as the last bin
# bins.append((num_initial_bins * initial_bin_size, total_range))
# nodes = []

# for binRange in bins:
#     nodes1 = pipeline.run(documents=docs[binRange[0]:binRange[-1]], num_workers=1) 
#     nodes.append(nodes1)


In [298]:
# save the nodes with content and metadata
# import pickle
# with open("policy_nodes.pkl", "wb") as file:  # 'wb' means write-binary mode
#     pickle.dump(nodes, file)

In [6]:
# load nodes
import pickle
with open("policy_nodes.pkl", "rb") as file:  # 'rb' means read-binary mode
    nodes = pickle.load(file)

In [7]:
nodes[0].metadata

{'file_path': 'C:\\Users\\liche\\Documents\\RAG_assignment\\RAG_assignment\\Policy_exm\\Policy_1.txt',
 'file_name': 'Policy_1.txt',
 'file_type': 'text/plain',
 'file_size': 6393,
 'creation_date': '2024-05-09',
 'last_modified_date': '2024-12-03',
 'doc_title': 'Comprehensive Data Privacy Policy',
 'section_title': '1. Introduction**',
 'questions_this_excerpt_can_answer': 'Based on the provided context, here are three specific questions and their potential answers:\n\n1. What is the specific date range during which [Company Name] was actively collecting personal data from customers, users, and employees?\n\nAnswer: The creation date of the policy (2024-05-09) and the last modified date (2024-12-03) imply that the company has been active in this period. However, to provide a more specific answer, we need additional information about when exactly [Company Name] started collecting personal data.\n\n2. Which geographic locations are covered by the scope of the policy?\n\nAnswer: The fac

In [8]:
# build vector store index. we can extend to other Vector DB
d = 512 # depend on the used embedding model
faiss_index = faiss.IndexFlatL2(d)
faiss_store = FaissVectorStore(faiss_index=faiss_index)
vector_index = VectorStoreIndex(nodes, vector_store=faiss_store, embed_model=embed_model)
keyword_index = SimpleKeywordTableIndex(nodes)

In [51]:
# Define a CustomRetriever

reranker =  SentenceTransformerRerank(model="BAAI/bge-reranker-large", top_n=5)

# define a customized Retriever which can switch between vector search and hybrid search with different reranker model 
class CustomRetriever(BaseRetriever):
    """Custom retriever that performs both Vector/hybrid search and Reranking"""

    def __init__(
        self,
        vector_retriever: VectorIndexRetriever,
        keyword_retriever: KeywordTableSimpleRetriever,
        reranker: reranker,
        Mode: str = "Vector" # "HybridAND", "HybridOR", "Vector"
    ) -> None:
        """Init params."""
        self._vector_retriever = vector_retriever
        self._keyword_retriever = keyword_retriever
        if Mode not in ("HybridAND", "HybridOR", "Vector"):
            raise ValueError("Invalid Hybrid mode.")
        self._Mode = Mode
        self._reranker = reranker
        super().__init__()
   

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve nodes given query."""
        vector_nodes = self._vector_retriever.retrieve(query_bundle)
        keyword_nodes = self._keyword_retriever.retrieve(query_bundle) # we can control the number of keywords here

        vector_ids = {n.node.node_id for n in vector_nodes}
        keyword_ids = {n.node.node_id for n in keyword_nodes}

        combined_dict = {n.node.node_id: n for n in vector_nodes}
        combined_dict.update({n.node.node_id: n for n in keyword_nodes})
        
        if self._Mode == "HybridAND":
            retrieve_ids = vector_ids.intersection(keyword_ids) # and. We use 'and' for our case
            if len(retrieve_ids) ==0:
                retrieve_ids = vector_ids
        elif self._Mode == "HybridOR":
             retrieve_ids = vector_ids.union(keyword_ids) # or
        else: # vector search
             retrieve_ids = vector_ids
        
        retrieve_nodes = [combined_dict[rid] for rid in retrieve_ids]
        
        if self._reranker is not None:
            retrieve_nodes = self._reranker.postprocess_nodes(retrieve_nodes, query_bundle)
        
        return retrieve_nodes

vector_retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=5) # make similarity_top_k a bit higher can include a broad knowledge
keyword_retriever = KeywordTableSimpleRetriever (index=keyword_index) 
# keyword_retriever = KeywordTableLLMRetriever(index=keyword_index) 

# hybrid search with the intersection between vectors and keywords search with reranker
hybrid_retriever_reranker_AND = CustomRetriever(vector_retriever, keyword_retriever, reranker, 'HybridAND') 
# hybrid search with the union of vectors and keywords search without reranker
hybrid_retriever_None_OR = CustomRetriever(vector_retriever, keyword_retriever, None, 'HybridOR') 



In [10]:
# Retrieval evluation to select a retriever and embedding model
# step 1: generate questions based on the context
# step 2: use the questions as the queries, check whether the relevant context included in the retrieved nodes and calc metrics 

from llama_index.core.evaluation import (
    generate_question_context_pairs
)
from llama_index.core.evaluation import RetrieverEvaluator

generate_qa_prompt = """\
            Context information is below.
            
            ---------------------
            {context_str}
            ---------------------
            
            Given the context information and no prior knowledge.
            generate only questions based on the below query.
            
            You are a Teacher/ Professor. Your task is to setup \
            {num_questions_per_chunk} questions for an upcoming \
            quiz/examination. The questions should be diverse in nature \
            across the document. Restrict the questions to the \
            context information provided. Do not ask the questions like "Here are {num_questions_per_chunk} questions based on the context information:"\
            and do not include any anwer.
            """


qa_dataset = generate_question_context_pairs(
    nodes,
    llm=llm,
    num_questions_per_chunk=3, #  generating questions when <5, generating answers and questions when >5
    qa_generate_prompt_tmpl = generate_qa_prompt
)

100%|██████████| 27/27 [05:43<00:00, 12.74s/it]


In [14]:
# import pickle
# with open("qa_dataset.pkl", "wb") as file:  # 'wb' means write-binary mode
#     pickle.dump(qa_dataset, file)
with open("qa_dataset.pkl", "rb") as file:  # 'rb' means read-binary mode
    qa_dataset = pickle.load(file)

In [12]:
# show the generated questions
print ("the number of questions generated: ", len(qa_dataset.queries))

the number of questions generated:  81


In [13]:
qa_dataset.queries

{'16750bf6-6cae-494a-853a-35e257834384': "What is the foundational principle of [Company Name]'s business operations regarding personal data?",
 'b9999098-2cdf-4e41-a3d9-9d61bcc9f58a': "According to what does this Data Privacy Policy aim to transparently communicate about [Company Name]'s practices?",
 'e4271bfd-1644-4ed0-a44d-3892a3b844fd': 'Who does this policy apply universally to, regardless of their location?',
 '2c843939-3af9-4da0-b2b4-fb0fb7207413': 'What types of data are collected when individuals access and utilize a service, according to the given data collection practices?',
 '0cd7d2e2-1b28-449e-9d0f-9228251bfcd8': 'A) Personal Identification Information',
 '53d033fc-f834-4557-91e6-b12e6dd400e9': 'B) Usage Data',
 'cfc0d5d8-7fef-4581-a77d-d9d0e7a77b60': 'Where are data storage locations chosen for their stringent security standards and data protection compliance?',
 '7dd8d3b7-a13c-4cc5-b27b-5d8d30a8db85': 'What is the purpose of implementing role-based access controls (RBAC

In [None]:
# list the options for embedding model, rerank model and search method

from llama_index.core.postprocessor import SentenceTransformerRerank

EMBEDDINGS = {
    "bge-small": HuggingFaceEmbedding(model_name='BAAI/bge-small-en-v1.5'), 
    "MiniLM-L6-v2": HuggingFaceEmbedding(model_name='sentence-transformers/all-MiniLM-L6-v2')
}
RERANKERS = {
    "WithoutReranker": None,
    "bge-reranker-base": SentenceTransformerRerank(model="BAAI/bge-reranker-base", top_n=5),
    "bge-reranker-large": SentenceTransformerRerank(model="BAAI/bge-reranker-large", top_n=5)
}
SEARCHERS = {
    "Vector Search": "Vector", # vector search
    "Hybrid Search AND": "HybridAND", # hybrid search using the interaction between vector and keyword
    "Hybrid Search OR": "HybridOR" # hybrid search using the union of vector and keyword
}

In [334]:
# Run retrieval evaluation

from llama_index.core.evaluation import RetrieverEvaluator
metrics = ["hit_rate", "mrr", "precision", "recall", "ap", "ndcg"]
metric_df = []
for embedKey in EMBEDDINGS.keys(): # loop all embedding models
    print("the embedding model: ", embedKey)

    if embedKey == "bge-small":
        d = 512
    else:
        d = 384 # all-MiniLM-L6-v2
    faiss_index = faiss.IndexFlatL2(d)
    faiss_store = FaissVectorStore(faiss_index=faiss_index)
    vector_index = VectorStoreIndex(nodes, vector_store=faiss_store, embed_model=EMBEDDINGS[embedKey])
    vector_retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=5)
    
    Settings.embed_model = EMBEDDINGS[embedKey] # change the global setting
    for searchKey in SEARCHERS.keys(): # loop all search methods
        for rerankerKey in RERANKERS.keys(): # loop all reranker methods
            print(RERANKERS[rerankerKey])
            Retriever = CustomRetriever(vector_retriever, keyword_retriever, RERANKERS[rerankerKey], SEARCHERS[searchKey])
            retriever_evaluator = RetrieverEvaluator.from_metric_names(
                metrics, retriever=Retriever
            )
            eval_results = await retriever_evaluator.aevaluate_dataset(qa_dataset)

            metric_dicts = []
            for eval_result in eval_results:
                metric_dict = eval_result.metric_vals_dict
                metric_dicts.append(metric_dict)
            full_df = pd.DataFrame(metric_dicts)
            columns = {
                "retrievers": embedKey + '/' + searchKey + '/' + rerankerKey,
                **{k: [full_df[k].mean()] for k in metrics},
            }
            if  len(metric_df)==0: 
                metric_df = pd.DataFrame(columns)
            else: 
                metric_df = pd.concat([pd.DataFrame(columns), metric_df])
metric_df
    


the embedding model:  bge-small
None
callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x00000168269B1A30> model='BAAI/bge-reranker-base' top_n=5 device='cpu' keep_retrieval_score=False
callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x0000016847014DD0> model='BAAI/bge-reranker-large' top_n=5 device='cpu' keep_retrieval_score=False
None
callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x00000168269B1A30> model='BAAI/bge-reranker-base' top_n=5 device='cpu' keep_retrieval_score=False
callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x0000016847014DD0> model='BAAI/bge-reranker-large' top_n=5 device='cpu' keep_retrieval_score=False
None
callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x00000168269B1A30> model='BAAI/bge-reranker-base' top_n=5 device='cpu' keep_retrieval_score=False
callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 

Unnamed: 0,retrievers,hit_rate,mrr,precision,recall,ap,ndcg
0,MiniLM-L6-v2/Hybrid Search OR/bge-reranker-large,0.9,0.699375,0.18,0.9,0.699375,0.254175
0,MiniLM-L6-v2/Hybrid Search OR/bge-reranker-base,0.9125,0.721042,0.1825,0.9125,0.721042,0.260963
0,MiniLM-L6-v2/Hybrid Search OR/WithoutReranker,0.95,0.266585,0.096297,0.95,0.266585,0.09465
0,MiniLM-L6-v2/Hybrid Search AND/bge-reranker-large,0.7375,0.639583,0.218542,0.7375,0.639583,0.285409
0,MiniLM-L6-v2/Hybrid Search AND/bge-reranker-base,0.7375,0.627083,0.218542,0.7375,0.627083,0.280395
0,MiniLM-L6-v2/Hybrid Search AND/WithoutReranker,0.7375,0.386667,0.218542,0.7375,0.386667,0.216403
0,MiniLM-L6-v2/Vector Search/bge-reranker-large,0.8125,0.695833,0.1625,0.8125,0.695833,0.246092
0,MiniLM-L6-v2/Vector Search/bge-reranker-base,0.8125,0.685417,0.1625,0.8125,0.685417,0.243518
0,MiniLM-L6-v2/Vector Search/WithoutReranker,0.8125,0.362292,0.1625,0.8125,0.362292,0.159993
0,bge-small/Hybrid Search OR/bge-reranker-large,0.925,0.719167,0.185,0.925,0.719167,0.261383


In [53]:
# based on the retrieval evaluation above,
# I chose the hybrid OR search method, the bge-small embedding model and the bge-reranker-large reranker

# set embedding model
embed_model = HuggingFaceEmbedding(model_name='BAAI/bge-small-en-v1.5')
Settings.embed_model = embed_model

# set retriever
d= 512
faiss_index = faiss.IndexFlatL2(d)
faiss_store = FaissVectorStore(faiss_index=faiss_index)
vector_index = VectorStoreIndex(nodes, vector_store=faiss_store, embed_model=embed_model)
vector_retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=5)
keyword_retriever = KeywordTableSimpleRetriever (index=keyword_index) 
reranker = SentenceTransformerRerank(model="BAAI/bge-reranker-large", top_n=5)
hybrid_retriever_reranker_OR = CustomRetriever(vector_retriever, keyword_retriever, reranker, 'HybridOR') 


In [19]:
from llama_index.core import PromptTemplate

qa_prompt = PromptTemplate(
    "Context information is below.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Given the context information and not prior knowledge, "
    "answer the query.\n"
    "Do not add any information that is not explicitly present in the retrieved content. "
    "If the information is not available, respond with, \"The provided documents do not contain this information.\"\n"
    "Do not try to make up an answer.\n"
    "Query: {query_str}\n"
    "Answer: "
)

In [101]:
# create a RAG Query engine
from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker
from llama_index.core.retrievers import VectorIndexRetriever

import os
os.environ['TRANSFORMERS_NO_ADVISORY_WARNINGS'] = 'true'

class Myresponse:
    def __init__(self, field1, field2):
        self.response = field1
        self.source_nodes = field2

class RAGStringQueryEngine(CustomQueryEngine):
    """RAG String Query Engine."""

    retriever: BaseRetriever
    response_synthesizer: BaseSynthesizer
    llm: Ollama
    qa_prompt: PromptTemplate
    
    def custom_query(self, query_str: str):
        retrieval_nodes = self.retriever.retrieve(query_str) # retrieved nodes
        query_bundle = QueryBundle(query_str)
        context_str = "\n\n".join([n.node.get_content() for n in retrieval_nodes])
        response = self.llm.complete(
            qa_prompt.format(context_str=context_str, query_str=query_str)
        )
        # align to the response evaluation later
        responseStructure = Myresponse(field1=str(response), field2=retrieval_nodes)
        
        return responseStructure

synthesizer = get_response_synthesizer(response_mode="compact")
llm = Ollama(model="llama3.2", temperature=0.6, request_timeout=300.0)

query_engine = RAGStringQueryEngine(
    retriever=hybrid_retriever_reranker_OR, 
    response_synthesizer=synthesizer,
    llm=llm,
    qa_prompt=qa_prompt
)

# query_text = "What is the purpose of comprehensive data privacy policy?"
# query_text = "where is the data storage locations?"
# query_text = "What is the primary principle guiding [Company Name]'s business operations regarding personal data?"
# query_text = "What is the foundational principle of [Company Name]'s business operations regarding personal data?"
# query_text = "what is AI model?"
# query_text = "Does the company allow three years of maternity leave?"
query_text = "What is AI Ethics Policy? How do I follow the AI Ethics policy?"
response = query_engine.query(query_text)
print(response.response)
# # Print the top-matching nodes retrieval_nodes
# print(response.source_nodes)

According to the context information, the AI Ethics Policy is a document that outlines the guiding principles and procedures for the governance of artificial intelligence (AI) model development, deployment, and monitoring within the company.

To follow the AI Ethics policy, you are expected to:

1. Understand and implement the guidelines set forth in this policy.
2. Collaborate with interdisciplinary teams, including ethicists and sociologists, to understand and address the nuances of fairness in diverse cultural and social contexts (as mentioned in Principle 4.1 Fairness).
3. Enhance transparency by developing interfaces that allow users to query AI decisions and receive explanations in understandable terms (as mentioned in Principle 4.2 Transparency).
4. Document all AI systems' decision-making processes and methodologies, ensuring that this documentation is accessible to all relevant stakeholders and regularly updated (as mentioned in Principle 4.2 Transparency).
5. Establish a rigo

In [102]:
# response evaluation, batch running
from llama_index.core.evaluation import (
    FaithfulnessEvaluator,
    RelevancyEvaluator,
    CorrectnessEvaluator,
    BatchEvalRunner
)

selected_items = list(qa_dataset.queries.items())

# Convert back to dictionary (if needed)
selected_queries = dict(selected_items)

# Let's pick top 10 queries to do evaluation
batch_eval_queries = selected_queries

# pull gemma2:2b using Ollam pull gemma2:2b
Evallm =  Ollama(model="phi3:mini", request_timeout=300.0)
# Evallm =  Ollama(model="gemma2:2b", request_timeout=300.0)
# Evallm =  llm
faithfulness_gemma2 = FaithfulnessEvaluator(llm=Evallm)
relevancy_gemma2 = RelevancyEvaluator(llm=Evallm)
correctness_gemma2 = CorrectnessEvaluator(llm=Evallm)

# Initiate BatchEvalRunner to compute FaithFulness and Relevancy Evaluation.
runner = BatchEvalRunner(
    # {"faithfulness": faithfulness_gemma2, "relevancy": relevancy_gemma2, 'correctness': correctness_gemma2},
    {"faithfulness": faithfulness_gemma2, "relevancy": relevancy_gemma2},
    # {"faithfulness": faithfulness_gemma2},
    # {"relevancy": relevancy_gemma2},
    workers=1, 
)

# # Compute evaluation
query_engine = RAGStringQueryEngine(
    retriever=hybrid_retriever_reranker_OR, 
    response_synthesizer=synthesizer,
    llm=llm,
    qa_prompt=qa_prompt
)

eval_results = await runner.aevaluate_queries(
    query_engine, queries=list(batch_eval_queries.values())
)



In [106]:
eval_results['faithfulness'][1].passing

True

In [84]:
def get_eval_results(key, eval_results):
    results = eval_results[key]
    correct = 0
    for result in results:
        if result.passing:
            correct += 1
    score = correct / len(results)
    print(f"{key} Score: {score}")
    return score

In [103]:
score = get_eval_results("faithfulness", eval_results)
score

faithfulness Score: 0.9382716049382716


0.9382716049382716

In [104]:
score = get_eval_results("relevancy", eval_results)
score

relevancy Score: 0.8518518518518519


0.8518518518518519