In [1]:
import requests
from langchain.document_loaders import TextLoader
from langchain_community.document_loaders import PyPDFLoader

doc_path = '../data/evaluation_set/Robinson Advisory.docx.pdf'

loader = PyPDFLoader(doc_path)
pages = loader.load()

pages

[Document(page_content='ADVISOR Y\nSER VICES\nAGREEMENT\nThis\nAdvisory\nServices\nAgreement\nis\nentered\ninto\nas\nof\nJune\n15\nth\n,\n2023\n(the\n“\nEffective\nDate\n”),\nby\nand \nbetween\nCloud\nInvestments\nLtd.,\nID\n51-426526-3,\nan\nIsraeli\ncompany\n(the\n"\nCompany\n"),\nand\nMr.\nJack \nRobinson,\nPassport\nNumber\n780055578,\nresiding\nat\n1\nRabin\nst,\nTel\nAviv,\nIsrael,\nEmail: \njackrobinson@gmail.com\n("\nAdvisor\n").\nWhereas,\nAdvisor\nhas\nexpertise\nand/or\nknowledge\nand/or\nrelationships,\nwhich\nare\nrelevant\nto\nthe \nCompany’ s\nbusiness\nand\nthe\nCompany\nhas\nasked\nAdvisor\nto\nprovide\nit\nwith\ncertain\nAdvisory \nservices,\nas\ndescribed\nin\nthis\nAgreement;\nand\nWhereas,\nAdvisor\nhas\nagreed\nto\nprovide\nthe\nCompany\nwith\nsuch\nservices,\nsubject\nto\nthe\nterms\nset\nforth \nin\nthis\nAgreement.\nNOW\nTHEREFORE\nTHE\nPARTIES\nAGREE\nAS\nFOLLOWS:\n1.\nServices:\n1.1\nAdvisor\nshall\nprovide\nto\nthe\nCompany ,\nas\nan\nindependent\ncontractor

### Character Text splitter chunking

In [2]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter, TokenTextSplitter

CHUNK_SIZE = 500
CHUNK_OVERLAP = 50

text_splitter = CharacterTextSplitter(chunk_size = CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
docs = text_splitter.split_documents(pages)

In [4]:
import re
import csv
import pandas as pd

robinson_file_path = '/home/tema/10X/week11/Legal_Expert_Contract_Advisor_RAG/data/evaluation_set/robinson.csv'
qna_df = pd.read_csv(robinson_file_path)

qna_df

Unnamed: 0,Question,Answer
0,Who are the parties to the Agreement and what...,Cloud Investments Ltd. (“Company”) and Jack Ro...
1,What is the termination notice?,According to section 4:14 days for convenience...
2,What are the payments to the Advisor under th...,According to section 6: 1. Fees of $9 per hour...
3,Can the Agreement or any of its obligations b...,1. Under section 1.1 the Advisor can’t assign ...
4,Who owns the IP?,According to section 4 of the Undertaking (App...
5,Is there a non-compete obligation to the Advi...,Yes. During the term of engagement with the Co...
6,Can the Advisor charge for meal time?,"No. See Section 6.1, Billable Hour doesn’t inc..."
7,In which street does the Advisor live?,"1 Rabin st, Tel Aviv, Israel"
8,Is the Advisor entitled to social benefits?,"No. According to section 8 of the Agreement, t..."
9,What happens if the Advisor claims compensati...,If the Advisor is determined to be an employee...


In [5]:
#current chiunk size
doc_length= qna_df['Question'].apply(lambda x : len(x)).mean()
print(doc_length)

50.6


### Defining llm & embeddings

In [6]:
import os
import openai
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv()) # read local .env file
OPENAI_API_KEY = os.environ['OPENAI_API_KEY']

MODEL = 'gpt-3.5-turbo'


In [5]:
import nest_asyncio
nest_asyncio.apply()


### Data Generation¶
import and use Ragas’ TestsetGenerator to quickly generate a synthetic test set from the loaded documents.

In [10]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# generator with openai models
generator_llm = ChatOpenAI(model="gpt-3.5-turbo-16k")
critic_llm = ChatOpenAI(model="gpt-4")
embeddings = OpenAIEmbeddings()

generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

# generate testset
testset = generator.generate_with_langchain_docs(pages, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})

Filename and doc_id are the same for all nodes.                 
Generating:  70%|███████   | 7/10 [00:49<00:22,  7.50s/it]max retries exceeded for SimpleEvolution(generator_llm=LangchainLLMWrapper(run_config=RunConfig(timeout=60, max_retries=15, max_wait=90, max_workers=16, thread_timeout=80.0, exception_types=<class 'openai.RateLimitError'>, log_tenacity=False)), docstore=InMemoryDocumentStore(splitter=<langchain_text_splitters.base.TokenTextSplitter object at 0x7944963786e0>, nodes=[Node(page_content='ADVISOR Y\nSER VICES\nAGREEMENT\nThis\nAdvisory\nServices\nAgreement\nis\nentered\ninto\nas\nof\nJune\n15\nth\n,\n2023\n(the\n“\nEffective\nDate\n”),\nby\nand \nbetween\nCloud\nInvestments\nLtd.,\nID\n51-426526-3,\nan\nIsraeli\ncompany\n(the\n"\nCompany\n"),\nand\nMr.\nJack \nRobinson,\nPassport\nNumber\n780055578,\nresiding\nat\n1\nRabin\nst,\nTel\nAviv,\nIsrael,\nEmail: \njackrobinson@gmail.com\n("\nAdvisor\n").\nWhereas,\nAdvisor\nhas\nexpertise\nand/or\nknowledge\nand/or\nrelations

 export the results into a Pandas DataFrame.

In [15]:
testset.to_pandas()

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,Who owns the Work Product created in the provi...,"[-\n4\n-\nConfidentiality,\nNone\nCompete\nand...",The Work Product created in the provision of s...,simple,[{'source': '../data/evaluation_set/Robinson A...,True
1,What is the relationship between the Company a...,"[without\nlimitation,\nany\nsalary ,\novertime...",The relationship between the Company and the A...,simple,[{'source': '../data/evaluation_set/Robinson A...,True
2,How should notices under this Agreement be del...,"[-\n3\n-\nconstitute\nsalary\npayments,\nand\n...",Notices under this Agreement should be deliver...,simple,[{'source': '../data/evaluation_set/Robinson A...,True
3,"What is the significance of the ""No Conflicts""...","[\nin\nwhich\nhe\nprovided\nthe\nServices,\non...","The ""No Conflicts"" clause in the agreement is ...",simple,[{'source': '../data/evaluation_set/Robinson A...,True
4,What is the purpose of the Non-Competition cla...,"[without\nlimitation,\nany\nsalary ,\novertime...",The purpose of the Non-Competition clause in t...,simple,[{'source': '../data/evaluation_set/Robinson A...,True
5,What other costs are associated with fulfillin...,"[without\nlimitation,\nany\nsalary ,\novertime...",The other costs associated with fulfilling obl...,reasoning,[{'source': '../data/evaluation_set/Robinson A...,True
6,What is the advisor's responsibility regarding...,"[\nin\nwhich\nhe\nprovided\nthe\nServices,\non...",Advisor agrees to inform the Company of any af...,multi_context,[{'source': '../data/evaluation_set/Robinson A...,True


In [62]:
# RAGAS expects a file_name dict as key
for document in pages:
    document.metadata['file_name'] = document.metadata['source']

### Evaluating using test sets

In [7]:
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI

from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

index = VectorstoreIndexCreator(embedding=embeddings).from_loaders([loader])
model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model= MODEL)



  warn_deprecated(


In [8]:
qa_chain = RetrievalQA.from_chain_type(
    model,
    retriever=index.vectorstore.as_retriever(),
    return_source_documents=True,
)


In [9]:
eval_questions = qna_df['Question'].tolist()
eval_answers = qna_df['Answer'].tolist()

examples  = [
    {
        "query": q, "ground_truths": [eval_answers[i]]
    }
    for i, q in enumerate(eval_questions)
]



In [10]:
predictions = qa_chain.batch(examples)

In [11]:
predictions

[{'query': ' Who are the parties to the Agreement and what are their defined names?',
  'ground_truths': ['Cloud Investments Ltd. (“Company”) and Jack Robinson (“Advisor”)'],
  'result': 'The parties to the Agreement are Cloud Investments Ltd (referred to as the Company) and Mr. Jack Robinson (referred to as the Advisor).',
  'source_documents': [Document(page_content='-\n4\n-\nConfidentiality,\nNone\nCompete\nand\nIP\nOwnership\nUndertaking\nAppendix\nA\nto\nAdvisory\nService\nAgreement\nas\nof\nJune\n15\nth\n,\n2023\nTHIS\nCONFIDENTIALITY\nUNDER TAKING\n(“\nUndertaking\n”)\nis\nentered\ninto\nas\nof\nJune\n15\nth\n,\n2023\n(“\nEffective\nDate\n”),\nby\nMr.\nJack \nRobinson,\nPassport\nNumber\n780055578,\nresiding\nat\n1\nRabin\nst,\nTel\nAviv,\nIsrael,\nEmail:\njackrobinson@gmail.com,\n(“\nAdvisor\n”),\ntowards\nCloud \nInvestments\nLtd\n(“\nCompany\n”),\nas\nfollows:\n1.\nDefinitions:\n(a)\nCompany’ s\nBusiness:\ndevelopment\nof\nan\nAI-based\ncontract\nassistant.\n(b)\nServices:\nS

In [15]:
for prediction in predictions:
    query = prediction['query']
    ground_truth = prediction['ground_truths']
    result = str(prediction['result']) 
    source_document = prediction['source_documents']

In [16]:

print(f"Query: {query}")
print(f"Result: {result}")


Query:  What happens if the Advisor claims compensation based on employment relationship with the
 Company?
Result: If the Advisor claims compensation based on an employment relationship with the Company, the agreement states that if it is determined by any governmental authority that the Advisor is an employee of the Company, then payments to the Advisor will be reduced retroactively. 60% of the payments will constitute salary payments, and 40% will be for other statutory rights and benefits as an employee. The Company may also offset any amounts due to the Advisor under the agreement from any amounts payable to the Advisor. The Advisor would need to indemnify the Company for any loss or expenses incurred if it is determined that an employer/employee relationship existed between the Advisor and the Company.


In [17]:
result.dtypes

AttributeError: 'str' object has no attribute 'dtypes'

#### Metrics¶
Ragas provides several metrics to evaluate various aspects of your RAG systems:

Retriever: Offers context_precision and context_recall that measure the performance of your retrieval system.

Generator (LLM): Provides faithfulness that measures hallucinations and answer_relevancy that measures how relevant the answers are to the question.

In [18]:
from ragas.langchain.evalchain import RagasEvaluatorChain
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
)


  from .autonotebook import tqdm as notebook_tqdm


In [55]:
# create evaluation chains
#faithfulness_chain = RagasEvaluatorChain(metric=faithfulness)
#answer_rel_chain = RagasEvaluatorChain(metric=answer_relevancy)
#context_rel_chain = RagasEvaluatorChain(metric=context_precision)
#context_recall_chain = RagasEvaluatorChain(metric=context_recall)

In [19]:
metrics=[
faithfulness,
answer_relevancy,
context_precision,
context_recall,
]

In [None]:

score = evaluate(result,metrics=[faithfulness])
score.to_pandas()

In [53]:
from ragas import evaluate

results = [prediction['result'] for prediction in predictions]

scores = evaluate(
    results,
    metrics=[context_precision, faithfulness, answer_relevancy, context_recall],
)

print(scores)


AttributeError: 'list' object has no attribute 'rename_columns'

In [None]:


faithfulness_score = faithfulness_chain.evaluate(examples, predictions)
answer_relevancy_score = answer_rel_chain.evaluate(examples, predictions)
context_precision_score = context_rel_chain.evaluate(examples, predictions)
context_recall_score = context_recall_chain.evaluate(examples, predictions)

In [None]:
faithfulness_scores = [score["faithfulness_score"] for score in faithfulness_score]
answer_relevancy_scores = [score["answer_relevancy_score"] for score in answer_relevancy_score]
context_precision_scores = [score["context_precision_score"] for score in context_precision_score]
context_recall_scores = [score["context_recall_score"] for score in context_recall_score]

Haystackpipeline

In [24]:
import pandas as pd
from ragas import evaluate
from ragas.metrics import context_precision, faithfulness, answer_relevancy, context_recall
from haystack.pipeline import RAGenerator, DocumentSearchPipeline
from haystack.retriever.sparse import ElasticsearchRetriever
from haystack.document_store.elasticsearch import ElasticsearchDocumentStore


ModuleNotFoundError: No module named 'haystack.pipeline'

In [None]:

# Initialize your RAG pipeline (example setup)
document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document")
retriever = ElasticsearchRetriever(document_store=document_store)
generator = RAGenerator(
    model_name_or_path="facebook/rag-token-base",
    use_gpu=True
)
model = DocumentSearchPipeline(retriever=retriever, generator=generator)



In [None]:
# Prepare evaluation examples
examples = [
    {"query": row["Question"], "ground_truths": [row["Answer"]]}
    for _, row in qna_df.iterrows()
]

# Perform evaluation
result = evaluate(
    examples,
    metrics=[context_precision, faithfulness, answer_relevancy, context_recall],
)

print(result)

In [None]:
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI

from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

index = VectorstoreIndexCreator(embedding=embeddings).from_loaders([loader])
model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model= MODEL)

qa_chain = RetrievalQA.from_chain_type(
    model,
    retriever=index.vectorstore.as_retriever(),
    return_source_documents=True,
)



In [None]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [None]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Weaviate
import weaviate
from weaviate.embedded import EmbeddedOptions
from dotenv import load_dotenv,find_dotenv


load_dotenv(find_dotenv())


client = weaviate.Client(
  embedded_options = EmbeddedOptions()
)

In [None]:

# Populate vector database
vectorstore = Weaviate.from_documents(
    client = client,    
    documents = chunks,
    embedding = OpenAIEmbeddings(),
    by_text = False
)



In [None]:
# Define vectorstore as retriever to enable semantic search
retriever = vectorstore.as_retriever()

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

# Define LLM
model = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

# Define prompt template
template = """You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. 
Use two sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:
"""

prompt = ChatPromptTemplate.from_template(template)


In [None]:
parser = StrOutputParser()
# Setup RAG pipeline
rag_chain = (
    {"context": retriever,  "question": RunnablePassthrough()}
    | prompt
    | model
    | parser
)

In [None]:
from datasets import Dataset

questions = ["What is the business objective PromptlyTech", 
             "What are the key services provided by PromptlyTech?",
             "What is prompt engineering?",
            ]
ground_truths = [["PromptlyTech aims to revolutionize how businesses interact with LLMs, making the technology more accessible, efficient, and effective."],
                ["PromptlyTech focuses on Automatic Prompt Generation, Automatic Evaluation Data Generation, and Prompt Testing and Ranking services."],
                ["Prompt engineering is the craft of designing queries or statements to guide LLMs to produce desired outcomes."]]
answers = []
contexts = []

# Inference
for query in questions:
  answers.append(rag_chain.invoke(query))
  contexts.append([docs.page_content for docs in retriever.get_relevant_documents(query)])

# To dict
data = {
    "question": questions, 
    "answer": answers, 
    "contexts": contexts, 
    "ground_truths": ground_truths
}

# Convert dict to dataset
dataset = Dataset.from_dict(data)