In [3]:
from dotenv import load_dotenv
load_dotenv(".env.local")

True

## .docx to text

In [78]:
from langchain_community.document_loaders.doc_intelligence import (
    AzureAIDocumentIntelligenceLoader,
)
import os
AZURE_DOCUMENT_INTELLIGENCE_API_KEY = os.getenv("AZURE_DOCUMENT_INTELLIGENCE_API_KEY")
def load_docs_adi(filepath: str):
    adi = AzureAIDocumentIntelligenceLoader(
        "https://eastus.api.cognitive.microsoft.com",
        AZURE_DOCUMENT_INTELLIGENCE_API_KEY,
        file_path=filepath,
        api_model="prebuilt-read",
    )
    docs = adi.load()
    return docs

In [85]:
from langchain_community.document_loaders import PyPDFLoader

def load_docs_pypdf(file_path: str):
    pypdf_loader = PyPDFLoader(
        file_path = file_path,
        password = None,
        extract_images = True,
        # headers = None
        # extraction_mode = "plain",
        # extraction_kwargs = None,
    )
    docs = pypdf_loader.load()
    return docs

In [88]:
current_dir = os.getcwd()
file_path = os.path.join(current_dir, "RA Duty Manual - Final Draft 2023-2024.pdf")

docs = load_docs_pypdf(file_path)

## Pinecone

In [7]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone.grpc import GRPCVector
from pinecone.grpc.index_grpc import UpsertResponse

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_HOST = os.getenv("PINECONE_HOST")
PINECONE_NAMESPACE = "umd-call-center"
pc = Pinecone(api_key=PINECONE_API_KEY)
pc = Pinecone(api_key=PINECONE_API_KEY)
PC_INDEX_NAME = "knowledge"
pc_index = pc.Index(PC_INDEX_NAME, host=PINECONE_HOST)

In [None]:
from typing import List
def query_pc(vector: List[float]):
    query_result = pc_index.query(
        vector=vector,
        namespace=PINECONE_NAMESPACE,
        top_k=10,
        #filter={"knowledge_uuid": {"$in": knowledge_uuids}},
        include_metadata=True,
        timeout=1,
    )
    return query_result


## Create Local Embeddings

In [91]:
from langchain_openai import OpenAIEmbeddings
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

embedder = OpenAIEmbeddings(
    model="text-embedding-3-large", api_key=OPENAI_API_KEY
)

In [92]:
from langchain_experimental.text_splitter import SemanticChunker
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema import Document

def split_by_semantics(content:str):
    semantic_splitter = SemanticChunker(
        embedder,
        add_start_index=True,
        sentence_split_regex=r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)(?=\s)",
    )
    splits = semantic_splitter.split_documents([Document(page_content=content)])
    start_index = 0
    for i, split in enumerate(splits):
        last_index = start_index + len(content) - 1
        split.metadata["start_index"] = start_index
        start_index = last_index + 1
    return splits

def split_by_character(content:str):
    recursive_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800, chunk_overlap=400, add_start_index=True
    )
    splits = recursive_splitter.split_documents([Document(page_content=content)])
    return splits

## Generate Test Dataset

## Measuring

### Retrieval relevence
Are the documents that were retrieved relevent to the query

### Answer Faithfulness
Is the answer grounded in the documents?
- This can measure hallucinations

### Answer Correctness
Is the answer consistent with a reference answer?

### Code

#### Gen dataset giskard

In [112]:
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document
splits = []
for doc in docs:
    splits.extend(split_by_character(doc.page_content))

In [113]:
vector_store = InMemoryVectorStore(OpenAIEmbeddings(model="text-embedding-3-large"))
vector_store.add_documents(splits)

['f68f0cb5-c77c-4f61-9913-49996156090a',
 '410fc1f4-2d18-41ed-821d-8ada696ffb92',
 '84cdecae-1921-4981-95b8-4786ea882520',
 'b1caf50c-5689-41dd-bd76-76572cbe9593',
 'b793a88d-1e07-4e75-b172-9337fb13a653',
 'cb2b7d69-8f86-4c48-ad05-ad337b312ce0',
 'b1bd1409-e146-483e-8999-b5e3323130ef',
 'f93c0b01-01e5-4527-a22b-fdd868f9e164',
 'f26fefdd-870b-4af8-b0ad-7270e66bb8e3',
 '5b90754d-c506-4c19-a395-37ca2227853c',
 '50d8f15d-2ab2-4832-9f14-100997637323',
 'd827f460-22c9-4879-8f75-b9cf1af01e7d',
 'ed5814bb-5276-40a5-9565-e5c9525014f7',
 '8fb8b4ce-ff82-45a5-8edd-6b2201c3d350',
 'ef6cf75f-86af-4c01-a223-9e2dea2935f1',
 '49c915c0-55b3-4acf-8af3-9131e075f3eb',
 'fb6708f3-f228-42eb-a221-22bf6d46482a',
 '682d7506-9d5f-4575-8646-7fe05e35375d',
 'd990cd20-1cd3-4ecb-94e2-261c2438db47',
 '62b5fd88-30e7-4a0b-b092-83f79ec116b6',
 'fe917db0-b3ff-4bfa-a5c6-58939b5388d1',
 '987e4f40-df7a-49e2-8b4e-14dd95e796ac',
 '1b1bbe44-c188-4e6c-ba87-4b5faa9a80d7',
 'd01aa048-d71f-40fb-988b-5513b9d87d14',
 'ada19e31-6d78-

In [114]:
import pickle
with open("splits.pkl", "wb") as f:
    pickle.dump(splits, f)

vector_store.dump("vectorstore_splits_character.json")

In [115]:
import pickle
with open("splits.pkl", "rb") as f:
    splits = pickle.load(f)

vector_store = InMemoryVectorStore.load("vectorstore_splits_character.json", embedding=OpenAIEmbeddings(model="text-embedding-3-large"))

In [117]:
import pandas as pd
from giskard.rag import KnowledgeBase
knowledge_base_df = pd.DataFrame([i.page_content for i in splits], columns=["text"])
knowledge_base = KnowledgeBase(knowledge_base_df)

In [118]:
import giskard
from giskard.llm.client.openai import OpenAIClient

giskard.llm.set_llm_api("openai")
oc = OpenAIClient(model="gpt-4o-mini")
giskard.llm.set_default_client(oc)

In [119]:
from giskard.rag import generate_testset

testset = generate_testset(
    knowledge_base,
    num_questions=25,
    language='en',  # optional, we'll auto detect if not provided
    agent_description="A support agent for the University of Maryland's call center", # helps generating better questions
)

2024-10-27 17:19:51,625 pid:49470 MainThread giskard.rag  INFO     Finding topics in the knowledge base.
2024-10-27 17:19:59,011 pid:49470 MainThread giskard.rag  INFO     Found 5 topics in the knowledge base.


Generating questions: 100%|██████████| 25/25 [00:47<00:00,  1.91s/it]


In [123]:
testset.save("testset25.jsonl")

In [None]:
from giskard.rag import QATestset
testset = QATestset.load("testset25.jsonl")

In [120]:
testset_df = testset.to_pandas()
testset_df.head()

Unnamed: 0_level_0,question,reference_answer,reference_context,conversation_history,metadata
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
b0c34dd1-5373-4a31-b941-b7672d4345bb,What should a Resident Assistant do if there i...,Contact UMPD immediately. RAs should NOT try t...,Document 173: Misconduct (OCRSM): (301)405-114...,[],"{'question_type': 'simple', 'seed_document_id'..."
0e624b2c-5816-4b41-a857-70ec8e87661c,What should you do if there is a question abou...,"The person should not be left alone, and UMPD ...",Document 73: Resources● CommunityLivingHandboo...,[],"{'question_type': 'simple', 'seed_document_id'..."
b1a6b840-1519-4eeb-849d-7bdc169bb22b,What is the policy regarding pets in the housi...,Only fish in aquariums no larger than 10 gallo...,Document 100: Resources● HousingAccommodations...,[],"{'question_type': 'simple', 'seed_document_id'..."
3a162c15-735d-415e-8ce6-52cfe8439388,What is the purpose of UMDAlert?,The context does not provide specific informat...,Document 6: 37SevereWeather 37EarlyWarningSyst...,[],"{'question_type': 'simple', 'seed_document_id'..."
0e4454d5-5f36-4e1d-9f31-2d7c4af1d6db,What should be documented according to the pro...,"Any vandalism, safety, and facility issues sho...","Document 32: Proppeddoors, fireextinguisher, f...",[],"{'question_type': 'simple', 'seed_document_id'..."


#### Evaluate gkiskard

In [127]:
from giskard.rag import evaluate
import openai

# Wrap your RAG model
def get_answer_fn(question: str, history=None) -> str:
    """A function representing your RAG agent."""
    # Format appropriately the history for your RAG agent
    messages = history if history else []
    messages.append({"role": "user", "content": question})

    system_message = "Search results found the following information that might be relevent: "
    similarity_search_with_score = vector_store.similarity_search_with_score(question, k=3)
    for i, doc_score in enumerate(similarity_search_with_score):
        doc, score = doc_score
        system_message += f"Source {i}. Relevency Score: {score}:\n"+ doc.page_content + "\n\n"
        print(score)
    
    messages.append({"role": "system", "content": system_message})

    reminder_message = "Remember, you are on a phone call. Your response to the caller should be accurate and concise. Do not monologue. Here is the caller's message:"
    messages.append({"role": "system", "content": reminder_message})

    chatcompletion = openai.chat.completions.create(messages=messages, model="gpt-4o-mini")
    answer = chatcompletion.choices[0].message.content
    return answer


#ragas_context_recall = RagasMetric(name="RAGAS Context Recall", metric=context_recall, requires_context=True)

report = evaluate(get_answer_fn, testset=testset, knowledge_base=knowledge_base)

Asking questions to the agent:   0%|          | 0/25 [00:00<?, ?it/s]

0.7241891073492221
0.6205387168476231
0.6050754900338735


Asking questions to the agent:   4%|▍         | 1/25 [00:02<00:57,  2.38s/it]

0.5868435076957883
0.5693694257192068
0.5591581471096498


Asking questions to the agent:   8%|▊         | 2/25 [00:04<00:49,  2.16s/it]

0.6144810300854778
0.5733218839716107
0.4773563073789033


Asking questions to the agent:  12%|█▏        | 3/25 [00:05<00:37,  1.72s/it]

0.7451413438400871
0.6599773284860726
0.5947561046246914


Asking questions to the agent:  16%|█▌        | 4/25 [00:06<00:32,  1.54s/it]

0.4051927607182702
0.3987328502874622
0.3819548530380381


Asking questions to the agent:  20%|██        | 5/25 [00:09<00:37,  1.87s/it]

0.3072724130249111
0.29565027551725914
0.27604629397246094


Asking questions to the agent:  24%|██▍       | 6/25 [00:10<00:29,  1.55s/it]

0.7300088221605286
0.6909497877678316
0.682541617588871


Asking questions to the agent:  28%|██▊       | 7/25 [00:11<00:27,  1.53s/it]

0.560006559154632
0.5437383889011225
0.5426055831930336


Asking questions to the agent:  32%|███▏      | 8/25 [00:14<00:30,  1.82s/it]

0.6351098483280861
0.6328419837275974
0.619882319783254


Asking questions to the agent:  36%|███▌      | 9/25 [00:15<00:29,  1.83s/it]

0.778357413994732
0.7017841228332797
0.6511103442984135


Asking questions to the agent:  40%|████      | 10/25 [00:18<00:31,  2.08s/it]

0.5985726040831316
0.5364582705763327
0.49331310471372036


Asking questions to the agent:  44%|████▍     | 11/25 [00:21<00:30,  2.20s/it]

0.6118994272457987
0.6037619307267892
0.5936619483548947


Asking questions to the agent:  48%|████▊     | 12/25 [00:22<00:26,  2.06s/it]

0.6754967314442215
0.6121562057789364
0.5825287903722441


Asking questions to the agent:  52%|█████▏    | 13/25 [00:24<00:24,  2.02s/it]

0.7162448215689478
0.6416745692943348
0.6256986767679817


Asking questions to the agent:  56%|█████▌    | 14/25 [00:26<00:20,  1.83s/it]

0.5258516456165069
0.5042206461312705
0.49940952667358834


Asking questions to the agent:  60%|██████    | 15/25 [00:29<00:23,  2.33s/it]

0.7061678360095967
0.6576552855531268
0.6572457419259239


Asking questions to the agent:  64%|██████▍   | 16/25 [00:31<00:20,  2.22s/it]

0.7135433277566139
0.6741064498172598
0.6694283948653671


Asking questions to the agent:  68%|██████▊   | 17/25 [00:34<00:18,  2.26s/it]

0.7498184155302388
0.6445351318615266
0.6135984139451814


Asking questions to the agent:  72%|███████▏  | 18/25 [00:35<00:14,  2.02s/it]

0.637130545681463
0.6289167083860592
0.6045614383098812


Asking questions to the agent:  76%|███████▌  | 19/25 [00:37<00:11,  1.89s/it]

0.5786756898740966
0.4954904717714237
0.4196484350077793


Asking questions to the agent:  80%|████████  | 20/25 [00:38<00:08,  1.66s/it]

0.8700859265221232
0.5514638343155064
0.3601710118811716


Asking questions to the agent:  84%|████████▍ | 21/25 [00:39<00:05,  1.45s/it]

0.19482005120081536
0.17292918163602117
0.1713101424338184


Asking questions to the agent:  88%|████████▊ | 22/25 [00:40<00:03,  1.33s/it]

0.19482005120081536
0.17292918163602117
0.1713101424338184


Asking questions to the agent:  92%|█████████▏| 23/25 [00:41<00:02,  1.22s/it]

0.18816208673665402
0.18252578447634601
0.17730803220299624


Asking questions to the agent:  96%|█████████▌| 24/25 [00:42<00:01,  1.17s/it]

0.19483867577811398
0.1729828845231799
0.17135938693853894


Asking questions to the agent: 100%|██████████| 25/25 [00:43<00:00,  1.74s/it]
Asking questions to the agent: 100%|██████████| 25/25 [00:43<00:00,  1.74s/it]
CorrectnessMetric evaluation: 100%|██████████| 25/25 [00:39<00:00,  1.57s/it]


In [None]:
#display(report)  # if you are working in a notebook

# or save the report as an HTML file
#report.to_html("rag_eval_report.html")

In [130]:
report.save("reports/gpt_4o_mini-splits_character-top3-pypdf")

In [71]:
from giskard.rag import RAGReport
loaded_report = RAGReport.load("basic_report")

In [65]:
report._knowledge_base._to_

<giskard.rag.knowledge_base.KnowledgeBase at 0x341418ce0>

In [63]:
report.failures

Unnamed: 0_level_0,question,reference_answer,reference_context,conversation_history,metadata,agent_answer,correctness,correctness_reason
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
9c63fed4-b956-4aa0-9501-741321f4f034,What happens if a student does not work throug...,"If a student does not work through Labor Day, ...",Document 147: summer benefits\nSummer housing ...,[],"{'question_type': 'simple', 'seed_document_id'...",If a student living in summer housing does not...,False,The agent provided a general response about im...
f29be739-0393-44a5-a519-7912dba4c9e1,What should a staff member do if they are goin...,They are expected to immediately call the Serv...,Document 45: If a staff member is unable to re...,[],"{'question_type': 'simple', 'seed_document_id'...",If a staff member knows they will be late for ...,False,The agent provided a general response about no...
fb530e2e-e211-41f1-9c11-3e5bc8078666,In order to be considered for a promotion to t...,An employee must work a minimum of 120 total h...,Document 135: a. Possess a satisfactory perfor...,[],"{'question_type': 'complex', 'seed_document_id...",The minimum total number of hours an employee ...,False,The agent did not provide the specific minimum...
c9e0790a-3be2-4adf-b9bd-7de3f0c01105,Could you specify the individuals who hold the...,"Only the Service Center Assistant Manager, Man...","Document 162: All keys, swipes and key rings i...",[],"{'question_type': 'complex', 'seed_document_id...",The authority to approve the signing out of ke...,False,The agent provided a broader list of roles tha...
94e282d7-352b-42f9-b78e-762e24a61c1d,What steps should a student take regarding the...,"If you have a student parking permit, remove i...",Document 55: If you have a student parking per...,[],"{'question_type': 'distracting element', 'seed...",If a student has received a written warning fo...,False,The agent provided a detailed response about g...
e049d396-2658-4b2f-8722-e42697cdc731,In the event that a substitute fails to report...,The original (scheduled/assigned) staff member...,Document 48: If the substitute does not report...,[],"{'question_type': 'distracting element', 'seed...",When a substitute fails to report for their sc...,False,The agent provided a detailed response about t...
14060525-e7cd-4fa3-b239-501a78dc994c,"Hi, I'm a staff member at the university and I...",The driver should take the vehicle out to Moto...,Document 171: Vehicles should never be returne...,[],"{'question_type': 'situational', 'seed_documen...",It's important to ensure that university vehic...,False,The agent provided general refueling procedure...
42d9bf93-4e26-4fef-b301-c31e7cd69bd8,"Hi there, I’m currently dealing with an unexpe...",If a staff member is not able to safely travel...,Document 63: In the event the University has a...,[],"{'question_type': 'situational', 'seed_documen...",If you are a staff member unable to safely tra...,False,The agent provided a detailed response about g...
ad2c7d12-7bb1-4710-9efc-16f83d2bbbeb,What is the main purpose of the Service Center...,The Service Center serves as the main communic...,Document 6: The Department of Residential Faci...,[],"{'question_type': 'double', 'original_question...",The main purpose of the Service Center is to p...,False,The agent provided a general description of th...
dc21b28c-3d62-4a84-83fc-ba8b83599849,What are the consequences?,Forgetting ID when working and using key slip ...,Document 182: not completing work in the box o...,"[{'role': 'user', 'content': 'I want to know w...","{'question_type': 'conversational', 'seed_docu...",The consequences of an employee forgetting the...,False,The agent provided a detailed explanation of p...


## Misc 

In [None]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone.grpc import GRPCVector
from pinecone.grpc.index_grpc import UpsertResponse

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_HOST = os.getenv("PINECONE_HOST")
PINECONE_NAMESPACE = "umd-call-center"
pc = Pinecone(api_key=PINECONE_API_KEY)
pc = Pinecone(api_key=PINECONE_API_KEY)
PC_INDEX_NAME = "knowledge"
pc_index = pc.Index(PC_INDEX_NAME, host=PINECONE_HOST)

In [None]:
from typing import List
def query_pc(vector: List[float]):
    query_result = pc_index.query(
        vector=vector,
        namespace=PINECONE_NAMESPACE,
        top_k=10,
        #filter={"knowledge_uuid": {"$in": knowledge_uuids}},
        include_metadata=True,
        timeout=1,
    )
    return query_result


In [None]:
import numpy as np

def cosine_similarity(vec1, vec2):
    # Compute dot product
    dot_product = np.dot(vec1, vec2)
    # Compute magnitudes
    magnitude_vec1 = np.linalg.norm(vec1)
    magnitude_vec2 = np.linalg.norm(vec2)
    # Calculate cosine similarity
    if magnitude_vec1 == 0 or magnitude_vec2 == 0:
        return 0  # Handle the case of zero magnitude
    return dot_product / (magnitude_vec1 * magnitude_vec2)
