In [3]:
from dotenv import load_dotenv
load_dotenv(".env.local")

True

## .docx to text

### AzureAI Document Intelligence

In [78]:
from langchain_community.document_loaders.doc_intelligence import (
    AzureAIDocumentIntelligenceLoader,
)
import os
AZURE_DOCUMENT_INTELLIGENCE_API_KEY = os.getenv("AZURE_DOCUMENT_INTELLIGENCE_API_KEY")
def load_docs_adi(filepath: str):
    adi = AzureAIDocumentIntelligenceLoader(
        "https://eastus.api.cognitive.microsoft.com",
        AZURE_DOCUMENT_INTELLIGENCE_API_KEY,
        file_path=filepath,
        api_model="prebuilt-read",
    )
    docs = adi.load()
    return docs

### PyPDF

In [85]:
from langchain_community.document_loaders import PyPDFLoader

def load_docs_pypdf(file_path: str):
    pypdf_loader = PyPDFLoader(
        file_path = file_path,
        password = None,
        extract_images = True,
        # headers = None
        # extraction_mode = "plain",
        # extraction_kwargs = None,
    )
    docs = pypdf_loader.load()
    return docs

### Zerox

In [132]:
from langchain.schema import Document
def load_docs_zerox(path:str):
    with open(path, "r") as f:
        data = f.read()
    return [Document(page_content=data)]

In [133]:
current_dir = os.getcwd()
#file_path = os.path.join(current_dir, "RA Duty Manual - Final Draft 2023-2024.pdf")
file_path = os.path.join(current_dir, "./zerox/output.md")

docs = load_docs_zerox(file_path)

## Pinecone

In [7]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone.grpc import GRPCVector
from pinecone.grpc.index_grpc import UpsertResponse

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_HOST = os.getenv("PINECONE_HOST")
PINECONE_NAMESPACE = "umd-call-center"
pc = Pinecone(api_key=PINECONE_API_KEY)
pc = Pinecone(api_key=PINECONE_API_KEY)
PC_INDEX_NAME = "knowledge"
pc_index = pc.Index(PC_INDEX_NAME, host=PINECONE_HOST)

In [None]:
from typing import List
def query_pc(vector: List[float]):
    query_result = pc_index.query(
        vector=vector,
        namespace=PINECONE_NAMESPACE,
        top_k=10,
        #filter={"knowledge_uuid": {"$in": knowledge_uuids}},
        include_metadata=True,
        timeout=1,
    )
    return query_result


## Create Local Embeddings

In [134]:
from langchain_openai import OpenAIEmbeddings
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

embedder = OpenAIEmbeddings(
    model="text-embedding-3-large", api_key=OPENAI_API_KEY
)

In [135]:
from langchain_experimental.text_splitter import SemanticChunker
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema import Document

def split_by_semantics(content:str):
    semantic_splitter = SemanticChunker(
        embedder,
        add_start_index=True,
        sentence_split_regex=r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)(?=\s)",
    )
    splits = semantic_splitter.split_documents([Document(page_content=content)])
    start_index = 0
    for i, split in enumerate(splits):
        last_index = start_index + len(content) - 1
        split.metadata["start_index"] = start_index
        start_index = last_index + 1
    return splits

def split_by_character(content:str):
    recursive_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800, chunk_overlap=400, add_start_index=True
    )
    splits = recursive_splitter.split_documents([Document(page_content=content)])
    return splits

## Generate Test Dataset

## Measuring

### Retrieval relevence
Are the documents that were retrieved relevent to the query

### Answer Faithfulness
Is the answer grounded in the documents?
- This can measure hallucinations

### Answer Correctness
Is the answer consistent with a reference answer?

### Code

#### Gen dataset giskard

In [136]:
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document
splits = []
for doc in docs:
    splits.extend(split_by_character(doc.page_content))

In [137]:
vector_store = InMemoryVectorStore(OpenAIEmbeddings(model="text-embedding-3-large"))
vector_store.add_documents(splits)

['8aa6cf10-812f-48d8-8437-7cb0de6a4243',
 'be365dfb-f8c9-4b6f-a14b-c3966d0030f6',
 '122f53f0-5f27-41b5-9633-b2d95b1aa255',
 'c7f7e036-cd31-4451-a069-6facce129fc4',
 '031777de-28ab-42bb-adfd-ee31b11d5d82',
 '846b9aea-9354-49f5-a9f2-aa49898d2611',
 '89e572f4-cedd-4344-9948-da49e8fd9c52',
 'd7c99160-9bfe-4504-a202-f15ad1b0c917',
 '80e4da48-ed34-4ac0-8510-8f05df238c97',
 'a476cfc0-a78f-404c-8924-c037aef868d5',
 'a14f7877-7908-4a60-9637-8afafd922d91',
 'dc483b9d-2599-486f-b49f-2d7f093b94a9',
 '7c551313-4487-436f-8bb5-8a94d78374b6',
 '11ab8557-09ce-4c6d-8529-c091effd5f35',
 'aef5caba-f69f-498e-a2ec-a16d67766363',
 'c3f3d7e6-64c4-4ffd-8720-f33ee477c62f',
 '43b90a1e-6879-4ce9-8b47-4fadc62c33d0',
 'be12fb19-56fe-4e73-958c-90e6ba39cbce',
 '1410d79f-43a5-44e1-ab00-477d7e48fc6e',
 '98d9c228-4ba5-4dfb-a651-324e69a503c2',
 '24ebf26d-f6d4-4207-b58c-6797e6650761',
 '3bb3986d-b857-4ba5-89b3-b376e2e66f93',
 'e387d3e5-f08c-4591-858f-b3769ea6196e',
 '90972e69-6e18-4c63-9a08-8c1ef86881ed',
 'd715e570-b012-

In [215]:
vector_store.dump("vector_store")

In [138]:
import pandas as pd
from giskard.rag import KnowledgeBase
knowledge_base_df = pd.DataFrame([i.page_content for i in splits], columns=["text"])
knowledge_base = KnowledgeBase(knowledge_base_df)

In [213]:
report = RAGReport.load("reports_testset250/p1-gpt_4o_mini-splits_character-max_marginal_relevance-top5-zerox-tester_gpt-4o-mini")
knowledge_base = report._knowledge_base

In [207]:
import giskard
from giskard.llm.client.openai import OpenAIClient

giskard.llm.set_llm_api("openai")
oc = OpenAIClient(model="gpt-4o-mini")

In [208]:
from typing import Any, Optional, Sequence, Union
from giskard.llm.client import LLMClient, ChatMessage
import openai
from openai.types.chat.completion_create_params import ResponseFormatJSONObject, ResponseFormatText

class O1PreviewClient(LLMClient):
    def complete(self, messages: Sequence[ChatMessage], temperature: float = 1, max_tokens: Union[Optional[int], Any] = None, caller_id: str | None = None, seed: int | None = None, format=None) -> ChatMessage:
        messages_openai = []
        for message in messages:
            role = message.role
            if message.role != "user" or message.role != "assistant":
                role = "user"
            messages_openai.append({"role": role, "content": message.content})
            
        if format is not None and "json" in format:
            rformat: ResponseFormatJSONObject = {"type": "json_object"}
        else:
            rformat: ResponseFormatText = {"type": "text"}

        if max_tokens is None:
            response = openai.chat.completions.create(messages=messages_openai, model="o1-preview", temperature=1,seed=seed, response_format=rformat)
        else:
            response = openai.chat.completions.create(messages=messages_openai, model="o1-preview", temperature=1, max_tokens=max_tokens, seed=seed, response_format=rformat)
            
        cm = ChatMessage(role=response.choices[0].message.role, content=response.choices[0].message.content)
        return cm
    
o1preview= O1PreviewClient()
giskard.llm.set_default_client(o1preview)


In [195]:
from giskard.rag import generate_testset

testset = generate_testset(
    knowledge_base,
    num_questions=100,
    language='en',  
    agent_description="A support agent for the University of Maryland's call center.", 
)

Generating questions:  45%|████▌     | 45/100 [24:33<31:26, 34.30s/it]  

2024-10-27 23:06:20,995 pid:49470 MainThread giskard.rag  ERROR    Encountered error in question generation: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}. Skipping.
2024-10-27 23:06:20,997 pid:49470 MainThread giskard.rag  ERROR    Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
Traceback (most recent call last):
  File "/Users/max/Documents/SynTag/syntag/misc/agents/umd-call-center/.venv/lib/python3.12/site-packages/giskard/rag/question_generators/base.py", line 57,

Generating questions:  45%|████▌     | 45/100 [26:04<31:52, 34.77s/it]


In [199]:
testset.to_pandas().to_excel("testset45-o1preview.xlsx")

In [200]:
testset.save("testset45-o1preview.jsonl")

In [140]:
from giskard.rag import QATestset
testset = QATestset.load("testset250.jsonl")

In [151]:
testset_df = testset.to_pandas()
testset_df.head()

Unnamed: 0_level_0,question,reference_answer,reference_context,conversation_history,metadata
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
f7688518-56c0-42b6-ac14-a3c83992a93d,What is the purpose of a Fire Watch?,A Fire Watch is an hourly foot patrol conducte...,Document 129: # Resources:\n- **After a Fire**...,[],"{'question_type': 'simple', 'seed_document_id'..."
aa5a916d-be2a-43bd-beee-3d778333d784,What is the primary focus of the Resident Dire...,The RD's primary focus must be the ability to ...,Document 16: Purpose: Being accessible to stud...,[],"{'question_type': 'simple', 'seed_document_id'..."
d58efa9b-2a56-4211-969b-c2d4605b8608,What should an RA do when a resident requires ...,The RA should contact UMPD at 301-405-3333 and...,Document 108: Procedure: \n1. When a resident...,[],"{'question_type': 'simple', 'seed_document_id'..."
9c433729-bb55-4cac-92f2-00903434fa28,What should a student do if they want to refus...,"In non-emergencies, students may choose to ref...","Document 109: 2. Date of birth (if under 18, n...",[],"{'question_type': 'simple', 'seed_document_id'..."
0caee60d-567c-4f74-b0e0-aa4efe189ead,What should Fire Watch staff do with the Fire ...,Fire Watch staff must update the Log sheet eac...,Document 132: 2. Receive a Fire Watch Log shee...,[],"{'question_type': 'simple', 'seed_document_id'..."


In [153]:
testset_df.to_excel("testset250.xlsx")

#### Evaluate gkiskard

In [214]:
from typing import Literal, Sequence
from giskard.llm.client.base import ChatMessage
from giskard.rag import evaluate
import openai

prompt_type: Literal["api", "p1"] = "p1"
retrieval_type: Literal["similarity_search", "max_marginal_relevance"] = "max_marginal_relevance"
retrieval_meta: Literal["top3", "top5", "top10"] = "top5"

def get_answer_fn(question: str, history=None) -> str:
    """A function representing your RAG agent."""
    # Format appropriately the history for your RAG agent
    messages = history if history else []
    messages.append({"role": "user", "content": question})


    if retrieval_type=="similarity_search":
        similarity_search_with_score = vector_store.similarity_search_with_score(question, k=5)
        content=""
        for i, doc_score in enumerate(similarity_search_with_score):
            doc, score = doc_score
            content += f"Source {i}. Relevency Score: {score}:\n"+ doc.page_content + "\n\n"
        return content
    elif retrieval_type=="max_marginal_relevance":
        docs = vector_store.max_marginal_relevance_search(question, k=5)
        content = ""
        for i, doc in enumerate(docs):
            content += f"Source {i}:\n"+ doc.page_content + "\n\n"
        return content

    if prompt_type == "api":
        system_message = "Search results found the following information that might be relevent:\n\n" 

        messages.append({"role": "system", "content": system_message + sources_str})

        reminder_message = "Remember, you are on a phone call. Your response to the caller should be accurate and concise. Do not monologue. Here is the caller's message:"
        messages.append({"role": "system", "content": reminder_message})
    elif prompt_type == "p1":
        system_message = "Search results found the following information that might be relevent:\n\n" 
        messages.append({"role": "system", "content": system_message + sources_str})

        instructions = "You are helpful support agent who answers phone calls.\n Search results will be given to you to help you answer questions. Only use those results to answer questions. If a topic comes up that you don't know, do not answer. You are to concisely answer their question, instead of quoting the information.Never insert additional information. If something is unclear, ask for clarification."
        messages.insert(0, {"role": "system", "content": instructions})
        messages.append({"role": "system", "content": instructions})

    chatcompletion = openai.chat.completions.create(messages=messages, model="gpt-4o-mini")
    answer = chatcompletion.choices[0].message.content
    return answer


#ragas_context_recall = RagasMetric(name="RAGAS Context Recall", metric=context_recall, requires_context=True)

from giskard.llm.client import LLMClient
from openai.types.chat.completion_create_params import ResponseFormatJSONObject, ResponseFormatText
test_llm: Literal[None, "gpt-4o-mini"] = "gpt-4o-mini"
class TesterLLM(LLMClient):
    def complete(self, messages: Sequence[ChatMessage], temperature: float = 1, max_tokens: int | None = None, caller_id: str | None = None, seed: int | None = None, format=None) -> ChatMessage:
        messages_openai = [{"role": message.role, "content": message.content} for message in messages]
        if format is not None and "json" in format:
            rformat: ResponseFormatJSONObject = {"type": "json_object"}
        else:
            rformat: ResponseFormatText = {"type": "text"}
        response = openai.chat.completions.create(messages=messages_openai, model="gpt-4o-mini", temperature=temperature, max_tokens=max_tokens, seed=seed, response_format=rformat)
        
        cm = ChatMessage(role=response.choices[0].message.role, content=response.choices[0].message.content)
        return cm
giskard.llm.set_default_client(TesterLLM())
report = evaluate(get_answer_fn, testset=testset, knowledge_base=knowledge_base, llm_client=TesterLLM())

Asking questions to the agent:   0%|          | 0/45 [00:01<?, ?it/s]


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [None]:
#display(report)  # if you are working in a notebook

# or save the report as an HTML file
#report.to_html("rag_eval_report.html")

In [169]:
report.save(f"reports_testset45-o1preview/{prompt_type}-gpt_4o_mini-splits_character-{retrieval_type}-{retrieval_meta}-zerox-tester_{test_llm}")

In [None]:
from giskard.rag import RAGReport
loaded_report = RAGReport.load("reports/...")

In [65]:
report._knowledge_base._to_

<giskard.rag.knowledge_base.KnowledgeBase at 0x341418ce0>

In [63]:
report.failures

Unnamed: 0_level_0,question,reference_answer,reference_context,conversation_history,metadata,agent_answer,correctness,correctness_reason
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
9c63fed4-b956-4aa0-9501-741321f4f034,What happens if a student does not work throug...,"If a student does not work through Labor Day, ...",Document 147: summer benefits\nSummer housing ...,[],"{'question_type': 'simple', 'seed_document_id'...",If a student living in summer housing does not...,False,The agent provided a general response about im...
f29be739-0393-44a5-a519-7912dba4c9e1,What should a staff member do if they are goin...,They are expected to immediately call the Serv...,Document 45: If a staff member is unable to re...,[],"{'question_type': 'simple', 'seed_document_id'...",If a staff member knows they will be late for ...,False,The agent provided a general response about no...
fb530e2e-e211-41f1-9c11-3e5bc8078666,In order to be considered for a promotion to t...,An employee must work a minimum of 120 total h...,Document 135: a. Possess a satisfactory perfor...,[],"{'question_type': 'complex', 'seed_document_id...",The minimum total number of hours an employee ...,False,The agent did not provide the specific minimum...
c9e0790a-3be2-4adf-b9bd-7de3f0c01105,Could you specify the individuals who hold the...,"Only the Service Center Assistant Manager, Man...","Document 162: All keys, swipes and key rings i...",[],"{'question_type': 'complex', 'seed_document_id...",The authority to approve the signing out of ke...,False,The agent provided a broader list of roles tha...
94e282d7-352b-42f9-b78e-762e24a61c1d,What steps should a student take regarding the...,"If you have a student parking permit, remove i...",Document 55: If you have a student parking per...,[],"{'question_type': 'distracting element', 'seed...",If a student has received a written warning fo...,False,The agent provided a detailed response about g...
e049d396-2658-4b2f-8722-e42697cdc731,In the event that a substitute fails to report...,The original (scheduled/assigned) staff member...,Document 48: If the substitute does not report...,[],"{'question_type': 'distracting element', 'seed...",When a substitute fails to report for their sc...,False,The agent provided a detailed response about t...
14060525-e7cd-4fa3-b239-501a78dc994c,"Hi, I'm a staff member at the university and I...",The driver should take the vehicle out to Moto...,Document 171: Vehicles should never be returne...,[],"{'question_type': 'situational', 'seed_documen...",It's important to ensure that university vehic...,False,The agent provided general refueling procedure...
42d9bf93-4e26-4fef-b301-c31e7cd69bd8,"Hi there, I’m currently dealing with an unexpe...",If a staff member is not able to safely travel...,Document 63: In the event the University has a...,[],"{'question_type': 'situational', 'seed_documen...",If you are a staff member unable to safely tra...,False,The agent provided a detailed response about g...
ad2c7d12-7bb1-4710-9efc-16f83d2bbbeb,What is the main purpose of the Service Center...,The Service Center serves as the main communic...,Document 6: The Department of Residential Faci...,[],"{'question_type': 'double', 'original_question...",The main purpose of the Service Center is to p...,False,The agent provided a general description of th...
dc21b28c-3d62-4a84-83fc-ba8b83599849,What are the consequences?,Forgetting ID when working and using key slip ...,Document 182: not completing work in the box o...,"[{'role': 'user', 'content': 'I want to know w...","{'question_type': 'conversational', 'seed_docu...",The consequences of an employee forgetting the...,False,The agent provided a detailed explanation of p...


## Misc 

In [None]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone.grpc import GRPCVector
from pinecone.grpc.index_grpc import UpsertResponse

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_HOST = os.getenv("PINECONE_HOST")
PINECONE_NAMESPACE = "umd-call-center"
pc = Pinecone(api_key=PINECONE_API_KEY)
pc = Pinecone(api_key=PINECONE_API_KEY)
PC_INDEX_NAME = "knowledge"
pc_index = pc.Index(PC_INDEX_NAME, host=PINECONE_HOST)

In [None]:
from typing import List
def query_pc(vector: List[float]):
    query_result = pc_index.query(
        vector=vector,
        namespace=PINECONE_NAMESPACE,
        top_k=10,
        #filter={"knowledge_uuid": {"$in": knowledge_uuids}},
        include_metadata=True,
        timeout=1,
    )
    return query_result


In [None]:
import numpy as np

def cosine_similarity(vec1, vec2):
    # Compute dot product
    dot_product = np.dot(vec1, vec2)
    # Compute magnitudes
    magnitude_vec1 = np.linalg.norm(vec1)
    magnitude_vec2 = np.linalg.norm(vec2)
    # Calculate cosine similarity
    if magnitude_vec1 == 0 or magnitude_vec2 == 0:
        return 0  # Handle the case of zero magnitude
    return dot_product / (magnitude_vec1 * magnitude_vec2)
