In [3]:
from dotenv import load_dotenv
load_dotenv(".env.local")

True

## .docx to text

### AzureAI Document Intelligence

In [78]:
from langchain_community.document_loaders.doc_intelligence import (
    AzureAIDocumentIntelligenceLoader,
)
import os
AZURE_DOCUMENT_INTELLIGENCE_API_KEY = os.getenv("AZURE_DOCUMENT_INTELLIGENCE_API_KEY")
def load_docs_adi(filepath: str):
    adi = AzureAIDocumentIntelligenceLoader(
        "https://eastus.api.cognitive.microsoft.com",
        AZURE_DOCUMENT_INTELLIGENCE_API_KEY,
        file_path=filepath,
        api_model="prebuilt-read",
    )
    docs = adi.load()
    return docs

### PyPDF

In [85]:
from langchain_community.document_loaders import PyPDFLoader

def load_docs_pypdf(file_path: str):
    pypdf_loader = PyPDFLoader(
        file_path = file_path,
        password = None,
        extract_images = True,
        # headers = None
        # extraction_mode = "plain",
        # extraction_kwargs = None,
    )
    docs = pypdf_loader.load()
    return docs

### Zerox

In [132]:
from langchain.schema import Document
def load_docs_zerox(path:str):
    with open(path, "r") as f:
        data = f.read()
    return [Document(page_content=data)]

In [133]:
current_dir = os.getcwd()
#file_path = os.path.join(current_dir, "RA Duty Manual - Final Draft 2023-2024.pdf")
file_path = os.path.join(current_dir, "./zerox/output.md")

docs = load_docs_zerox(file_path)

## Pinecone

In [7]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone.grpc import GRPCVector
from pinecone.grpc.index_grpc import UpsertResponse

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_HOST = os.getenv("PINECONE_HOST")
PINECONE_NAMESPACE = "umd-call-center"
pc = Pinecone(api_key=PINECONE_API_KEY)
pc = Pinecone(api_key=PINECONE_API_KEY)
PC_INDEX_NAME = "knowledge"
pc_index = pc.Index(PC_INDEX_NAME, host=PINECONE_HOST)

In [None]:
from typing import List
def query_pc(vector: List[float]):
    query_result = pc_index.query(
        vector=vector,
        namespace=PINECONE_NAMESPACE,
        top_k=10,
        #filter={"knowledge_uuid": {"$in": knowledge_uuids}},
        include_metadata=True,
        timeout=1,
    )
    return query_result


## Create Local Embeddings

In [134]:
from langchain_openai import OpenAIEmbeddings
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

embedder = OpenAIEmbeddings(
    model="text-embedding-3-large", api_key=OPENAI_API_KEY
)

In [135]:
from langchain_experimental.text_splitter import SemanticChunker
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema import Document

def split_by_semantics(content:str):
    semantic_splitter = SemanticChunker(
        embedder,
        add_start_index=True,
        sentence_split_regex=r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)(?=\s)",
    )
    splits = semantic_splitter.split_documents([Document(page_content=content)])
    start_index = 0
    for i, split in enumerate(splits):
        last_index = start_index + len(content) - 1
        split.metadata["start_index"] = start_index
        start_index = last_index + 1
    return splits

def split_by_character(content:str):
    recursive_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800, chunk_overlap=400, add_start_index=True
    )
    splits = recursive_splitter.split_documents([Document(page_content=content)])
    return splits

## Generate Test Dataset

## Measuring

### Retrieval relevence
Are the documents that were retrieved relevent to the query

### Answer Faithfulness
Is the answer grounded in the documents?
- This can measure hallucinations

### Answer Correctness
Is the answer consistent with a reference answer?

### Code

#### Gen dataset giskard

In [136]:
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document
splits = []
for doc in docs:
    splits.extend(split_by_character(doc.page_content))

In [137]:
vector_store = InMemoryVectorStore(OpenAIEmbeddings(model="text-embedding-3-large"))
vector_store.add_documents(splits)

['8aa6cf10-812f-48d8-8437-7cb0de6a4243',
 'be365dfb-f8c9-4b6f-a14b-c3966d0030f6',
 '122f53f0-5f27-41b5-9633-b2d95b1aa255',
 'c7f7e036-cd31-4451-a069-6facce129fc4',
 '031777de-28ab-42bb-adfd-ee31b11d5d82',
 '846b9aea-9354-49f5-a9f2-aa49898d2611',
 '89e572f4-cedd-4344-9948-da49e8fd9c52',
 'd7c99160-9bfe-4504-a202-f15ad1b0c917',
 '80e4da48-ed34-4ac0-8510-8f05df238c97',
 'a476cfc0-a78f-404c-8924-c037aef868d5',
 'a14f7877-7908-4a60-9637-8afafd922d91',
 'dc483b9d-2599-486f-b49f-2d7f093b94a9',
 '7c551313-4487-436f-8bb5-8a94d78374b6',
 '11ab8557-09ce-4c6d-8529-c091effd5f35',
 'aef5caba-f69f-498e-a2ec-a16d67766363',
 'c3f3d7e6-64c4-4ffd-8720-f33ee477c62f',
 '43b90a1e-6879-4ce9-8b47-4fadc62c33d0',
 'be12fb19-56fe-4e73-958c-90e6ba39cbce',
 '1410d79f-43a5-44e1-ab00-477d7e48fc6e',
 '98d9c228-4ba5-4dfb-a651-324e69a503c2',
 '24ebf26d-f6d4-4207-b58c-6797e6650761',
 '3bb3986d-b857-4ba5-89b3-b376e2e66f93',
 'e387d3e5-f08c-4591-858f-b3769ea6196e',
 '90972e69-6e18-4c63-9a08-8c1ef86881ed',
 'd715e570-b012-

In [138]:
import pandas as pd
from giskard.rag import KnowledgeBase
knowledge_base_df = pd.DataFrame([i.page_content for i in splits], columns=["text"])
knowledge_base = KnowledgeBase(knowledge_base_df)

In [139]:
import giskard
from giskard.llm.client.openai import OpenAIClient

giskard.llm.set_llm_api("openai")
oc = OpenAIClient(model="gpt-4o-mini")
giskard.llm.set_default_client(oc)

In [119]:
from giskard.rag import generate_testset

testset = generate_testset(
    knowledge_base,
    num_questions=25,
    language='en',  # optional, we'll auto detect if not provided
    agent_description="A support agent for the University of Maryland's call center", # helps generating better questions
)

2024-10-27 17:19:51,625 pid:49470 MainThread giskard.rag  INFO     Finding topics in the knowledge base.
2024-10-27 17:19:59,011 pid:49470 MainThread giskard.rag  INFO     Found 5 topics in the knowledge base.


Generating questions: 100%|██████████| 25/25 [00:47<00:00,  1.91s/it]


In [123]:
testset.save("testset25.jsonl")

In [140]:
from giskard.rag import QATestset
testset = QATestset.load("testset25.jsonl")

In [141]:
testset_df = testset.to_pandas()
testset_df.head()

Unnamed: 0_level_0,question,reference_answer,reference_context,conversation_history,metadata
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
b0c34dd1-5373-4a31-b941-b7672d4345bb,What should a Resident Assistant do if there i...,Contact UMPD immediately. RAs should NOT try t...,Document 173: Misconduct (OCRSM): (301)405-114...,[],"{'question_type': 'simple', 'seed_document_id'..."
0e624b2c-5816-4b41-a857-70ec8e87661c,What should you do if there is a question abou...,"The person should not be left alone, and UMPD ...",Document 73: Resources● CommunityLivingHandboo...,[],"{'question_type': 'simple', 'seed_document_id'..."
b1a6b840-1519-4eeb-849d-7bdc169bb22b,What is the policy regarding pets in the housi...,Only fish in aquariums no larger than 10 gallo...,Document 100: Resources● HousingAccommodations...,[],"{'question_type': 'simple', 'seed_document_id'..."
3a162c15-735d-415e-8ce6-52cfe8439388,What is the purpose of UMDAlert?,The context does not provide specific informat...,Document 6: 37SevereWeather 37EarlyWarningSyst...,[],"{'question_type': 'simple', 'seed_document_id'..."
0e4454d5-5f36-4e1d-9f31-2d7c4af1d6db,What should be documented according to the pro...,"Any vandalism, safety, and facility issues sho...","Document 32: Proppeddoors, fireextinguisher, f...",[],"{'question_type': 'simple', 'seed_document_id'..."


#### Evaluate gkiskard

In [147]:
from giskard.rag import evaluate
import openai

# Wrap your RAG model
def get_answer_fn(question: str, history=None) -> str:
    """A function representing your RAG agent."""
    # Format appropriately the history for your RAG agent
    messages = history if history else []
    messages.append({"role": "user", "content": question})

    system_message = "Search results found the following information that might be relevent: "
    similarity_search_with_score = vector_store.similarity_search_with_score(question, k=10)
    for i, doc_score in enumerate(similarity_search_with_score):
        doc, score = doc_score
        system_message += f"Source {i}. Relevency Score: {score}:\n"+ doc.page_content + "\n\n"
        print(score)
    
    messages.append({"role": "system", "content": system_message})

    reminder_message = "Remember, you are on a phone call. Your response to the caller should be accurate and concise. Do not monologue. Here is the caller's message:"
    messages.append({"role": "system", "content": reminder_message})

    chatcompletion = openai.chat.completions.create(messages=messages, model="gpt-4o-mini")
    answer = chatcompletion.choices[0].message.content
    return answer


#ragas_context_recall = RagasMetric(name="RAGAS Context Recall", metric=context_recall, requires_context=True)

report = evaluate(get_answer_fn, testset=testset, knowledge_base=knowledge_base)

Asking questions to the agent:   0%|          | 0/25 [00:00<?, ?it/s]

0.7237126982455698
0.6514098405948775
0.6145697745967713
0.6068658810293511
0.6025936099001936
0.5964764142498462
0.5902256182761509
0.589838316419847
0.5706793613001738
0.5648190586039237


Asking questions to the agent:   4%|▍         | 1/25 [00:03<01:31,  3.81s/it]

0.5757888117884399
0.548253871219513
0.5444709183453731
0.5430358268789178
0.5319537219203614
0.5314839334522206
0.5004866809664246
0.4913180445759836
0.49127649762556236
0.4909809232390961


Asking questions to the agent:   8%|▊         | 2/25 [00:06<01:17,  3.37s/it]

0.56585656468126
0.5592144605746568
0.5408973850560962
0.5301577543537357
0.48894413173256734
0.42868337130878104
0.41280428808415576
0.38762942276955675
0.37616896165085234
0.3729093418647577


Asking questions to the agent:  12%|█▏        | 3/25 [00:08<00:52,  2.39s/it]

0.7226197957254779
0.5624489758946831
0.5454719800673563
0.4820584476769591
0.4464036880482169
0.4463091134381085
0.4392232986732536
0.43702379506147976
0.42800798794570355
0.4243810891776885


Asking questions to the agent:  16%|█▌        | 4/25 [00:09<00:44,  2.10s/it]

0.37526753492301607
0.37187250281137973
0.3653929081594434
0.36299759056266445
0.3628305745230187
0.36219228450401086
0.3599662734232893
0.35367992249921043
0.3523758270075689
0.35041092587028866


Asking questions to the agent:  20%|██        | 5/25 [00:11<00:41,  2.06s/it]

0.31975215991798456
0.2912539602979305
0.29076058950327266
0.28603128603024736
0.28550254334813985
0.280010841615959
0.27906790840223156
0.27247647053062907
0.2711163024729027
0.2701139405741325


Asking questions to the agent:  24%|██▍       | 6/25 [00:13<00:35,  1.86s/it]

0.719604110729393
0.6889458085339603
0.68713954485694
0.6750471649104809
0.6653122231749921
0.6259200022748781
0.6251585401276982
0.6237646297201691
0.6195289030394929
0.6005433844795774


Asking questions to the agent:  28%|██▊       | 7/25 [00:15<00:35,  1.96s/it]

0.5353508484720502
0.5343278733284261
0.5264367440336204
0.5260850037343809
0.5186010570539884
0.5140560151838931
0.5095295037601736
0.5092949239633601
0.5079320488375197
0.4973114137838078


Asking questions to the agent:  32%|███▏      | 8/25 [00:17<00:36,  2.15s/it]

0.6833269201090799
0.6623878371258636
0.6311328872127183
0.630238707627953
0.49822638037785977
0.47776934214013894
0.461414676705547
0.4470684871859198
0.4452235973039683
0.44351065159630104


Asking questions to the agent:  36%|███▌      | 9/25 [00:19<00:33,  2.09s/it]

0.7658295379058687
0.6705665282368578
0.6514418616676099
0.6476309012604864
0.6400318909286731
0.6316016576604278
0.6173019905368922
0.6108324531257547
0.6107026420590178
0.6064102597109029


Asking questions to the agent:  40%|████      | 10/25 [00:23<00:38,  2.55s/it]

0.5974437669102287
0.5105812308561004
0.5091748553106249
0.4644490370967986
0.4606090184773916
0.45381020517697374
0.4485791076143343
0.43715915725847826
0.4349367767109604
0.4208540425452083


Asking questions to the agent:  44%|████▍     | 11/25 [00:25<00:33,  2.41s/it]

0.6110182845128485
0.5896616913445509
0.5798994172232814
0.5679690754090982
0.567822041760414
0.5604724722904708
0.556202284559698
0.5546550258371029
0.5398315462433148
0.5240074033556229


Asking questions to the agent:  48%|████▊     | 12/25 [00:28<00:33,  2.58s/it]

0.7307373685754002
0.5981157319608653
0.5874849716165137
0.5831737428364038
0.5676780218979613
0.5666924759910541
0.5653132021641447
0.5514191184014415
0.5508961086760091
0.5431315726838786


Asking questions to the agent:  52%|█████▏    | 13/25 [00:30<00:28,  2.38s/it]

0.7167253585510116
0.6898048811427288
0.605686753958847
0.5954492409872172
0.5619395698747862
0.5561025679518392
0.5526415890667474
0.5440004147864144
0.5407275537596404
0.5276096698103381


Asking questions to the agent:  56%|█████▌    | 14/25 [00:31<00:21,  1.94s/it]

0.5141621901496353
0.498876388620693
0.4980523765148619
0.4866031706866869
0.48454344343612715
0.4760328100837318
0.4736180947247682
0.46573857292561954
0.46436442480750617
0.4627293346424489


Asking questions to the agent:  60%|██████    | 15/25 [00:35<00:26,  2.66s/it]

0.7207729921398278
0.6782681286565786
0.6522370459108315
0.6510119065229845
0.6263815011769455
0.6211923493180969
0.6170765889561757
0.5997822497247746
0.5963402599303231
0.5792251336206605


Asking questions to the agent:  64%|██████▍   | 16/25 [00:37<00:21,  2.40s/it]

0.7150939805163461
0.6731653776842219
0.6703212763939066
0.6627756646693426
0.6332803872748365
0.6254723583631573
0.6251032162276148
0.6153625218580272
0.6140938843236085
0.6102877694890448


Asking questions to the agent:  68%|██████▊   | 17/25 [00:41<00:23,  2.97s/it]

0.7297779159516994
0.7069740405275222
0.6467063975378533
0.5994071787603966
0.5881116588145558
0.5826984905038052
0.5800249059691237
0.5120254835888834
0.5082259238330068
0.506608188522988


Asking questions to the agent:  72%|███████▏  | 18/25 [00:43<00:17,  2.47s/it]

0.6810517401795564
0.6441075725122054
0.6155622111277953
0.6146218402888749
0.6107659580736211
0.5761635333980152
0.5480581718888762
0.53952219179077
0.5370598310304515
0.5293216505544562


Asking questions to the agent:  76%|███████▌  | 19/25 [00:44<00:12,  2.09s/it]

0.5193103763436703
0.4396005631703056
0.40235837797691604
0.39972935040049323
0.3966921283964956
0.38995613365159915
0.38947959413108496
0.38667126532635926
0.38474447243061455
0.3841128054149019


Asking questions to the agent:  80%|████████  | 20/25 [00:45<00:09,  1.91s/it]

0.6474024763561272
0.36650073618338
0.31841995174686877
0.26043992940246635
0.25177706744324585
0.24962371080849433
0.2375443266841758
0.23716002464337144
0.2252143081300019
0.2185837322076068


Asking questions to the agent:  84%|████████▍ | 21/25 [00:47<00:07,  1.76s/it]

0.18161314831851225
0.17042193190307853
0.16948247714615225
0.16905574540889326
0.16842111768154575
0.16820469846014835
0.1675828702922835
0.16524818848727882
0.1601726204868421
0.1584932845721518


Asking questions to the agent:  88%|████████▊ | 22/25 [00:49<00:05,  1.88s/it]

0.18161314831851225
0.17042193190307853
0.16948247714615225
0.16905574540889326
0.16842111768154575
0.16820469846014835
0.1675828702922835
0.16524818848727882
0.1601726204868421
0.1584932845721518


Asking questions to the agent:  92%|█████████▏| 23/25 [00:50<00:03,  1.68s/it]

0.23167218549473592
0.20645081263592188
0.20206284412556813
0.1898131430756027
0.1847420402141643
0.17775405498393237
0.17552635266009825
0.1719223966582029
0.17158996281737926
0.16813970899058298


Asking questions to the agent:  96%|█████████▌| 24/25 [00:51<00:01,  1.46s/it]

0.18161314831851225
0.17042193190307853
0.16948247714615225
0.16905574540889326
0.16842111768154575
0.16820469846014835
0.1675828702922835
0.16524818848727882
0.1601726204868421
0.1584932845721518


Asking questions to the agent: 100%|██████████| 25/25 [00:52<00:00,  2.09s/it]
Asking questions to the agent: 100%|██████████| 25/25 [00:52<00:00,  2.09s/it]
CorrectnessMetric evaluation: 100%|██████████| 25/25 [00:21<00:00,  1.17it/s]


In [None]:
#display(report)  # if you are working in a notebook

# or save the report as an HTML file
#report.to_html("rag_eval_report.html")

In [148]:
report.save("reports/gpt_4o_mini-splits_character-top10-zerox")

In [None]:
from giskard.rag import RAGReport
loaded_report = RAGReport.load("reports/...")

In [65]:
report._knowledge_base._to_

<giskard.rag.knowledge_base.KnowledgeBase at 0x341418ce0>

In [63]:
report.failures

Unnamed: 0_level_0,question,reference_answer,reference_context,conversation_history,metadata,agent_answer,correctness,correctness_reason
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
9c63fed4-b956-4aa0-9501-741321f4f034,What happens if a student does not work throug...,"If a student does not work through Labor Day, ...",Document 147: summer benefits\nSummer housing ...,[],"{'question_type': 'simple', 'seed_document_id'...",If a student living in summer housing does not...,False,The agent provided a general response about im...
f29be739-0393-44a5-a519-7912dba4c9e1,What should a staff member do if they are goin...,They are expected to immediately call the Serv...,Document 45: If a staff member is unable to re...,[],"{'question_type': 'simple', 'seed_document_id'...",If a staff member knows they will be late for ...,False,The agent provided a general response about no...
fb530e2e-e211-41f1-9c11-3e5bc8078666,In order to be considered for a promotion to t...,An employee must work a minimum of 120 total h...,Document 135: a. Possess a satisfactory perfor...,[],"{'question_type': 'complex', 'seed_document_id...",The minimum total number of hours an employee ...,False,The agent did not provide the specific minimum...
c9e0790a-3be2-4adf-b9bd-7de3f0c01105,Could you specify the individuals who hold the...,"Only the Service Center Assistant Manager, Man...","Document 162: All keys, swipes and key rings i...",[],"{'question_type': 'complex', 'seed_document_id...",The authority to approve the signing out of ke...,False,The agent provided a broader list of roles tha...
94e282d7-352b-42f9-b78e-762e24a61c1d,What steps should a student take regarding the...,"If you have a student parking permit, remove i...",Document 55: If you have a student parking per...,[],"{'question_type': 'distracting element', 'seed...",If a student has received a written warning fo...,False,The agent provided a detailed response about g...
e049d396-2658-4b2f-8722-e42697cdc731,In the event that a substitute fails to report...,The original (scheduled/assigned) staff member...,Document 48: If the substitute does not report...,[],"{'question_type': 'distracting element', 'seed...",When a substitute fails to report for their sc...,False,The agent provided a detailed response about t...
14060525-e7cd-4fa3-b239-501a78dc994c,"Hi, I'm a staff member at the university and I...",The driver should take the vehicle out to Moto...,Document 171: Vehicles should never be returne...,[],"{'question_type': 'situational', 'seed_documen...",It's important to ensure that university vehic...,False,The agent provided general refueling procedure...
42d9bf93-4e26-4fef-b301-c31e7cd69bd8,"Hi there, I’m currently dealing with an unexpe...",If a staff member is not able to safely travel...,Document 63: In the event the University has a...,[],"{'question_type': 'situational', 'seed_documen...",If you are a staff member unable to safely tra...,False,The agent provided a detailed response about g...
ad2c7d12-7bb1-4710-9efc-16f83d2bbbeb,What is the main purpose of the Service Center...,The Service Center serves as the main communic...,Document 6: The Department of Residential Faci...,[],"{'question_type': 'double', 'original_question...",The main purpose of the Service Center is to p...,False,The agent provided a general description of th...
dc21b28c-3d62-4a84-83fc-ba8b83599849,What are the consequences?,Forgetting ID when working and using key slip ...,Document 182: not completing work in the box o...,"[{'role': 'user', 'content': 'I want to know w...","{'question_type': 'conversational', 'seed_docu...",The consequences of an employee forgetting the...,False,The agent provided a detailed explanation of p...


## Misc 

In [None]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone.grpc import GRPCVector
from pinecone.grpc.index_grpc import UpsertResponse

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_HOST = os.getenv("PINECONE_HOST")
PINECONE_NAMESPACE = "umd-call-center"
pc = Pinecone(api_key=PINECONE_API_KEY)
pc = Pinecone(api_key=PINECONE_API_KEY)
PC_INDEX_NAME = "knowledge"
pc_index = pc.Index(PC_INDEX_NAME, host=PINECONE_HOST)

In [None]:
from typing import List
def query_pc(vector: List[float]):
    query_result = pc_index.query(
        vector=vector,
        namespace=PINECONE_NAMESPACE,
        top_k=10,
        #filter={"knowledge_uuid": {"$in": knowledge_uuids}},
        include_metadata=True,
        timeout=1,
    )
    return query_result


In [None]:
import numpy as np

def cosine_similarity(vec1, vec2):
    # Compute dot product
    dot_product = np.dot(vec1, vec2)
    # Compute magnitudes
    magnitude_vec1 = np.linalg.norm(vec1)
    magnitude_vec2 = np.linalg.norm(vec2)
    # Calculate cosine similarity
    if magnitude_vec1 == 0 or magnitude_vec2 == 0:
        return 0  # Handle the case of zero magnitude
    return dot_product / (magnitude_vec1 * magnitude_vec2)
