# Evaluations

In [1]:
import os
from dotenv import load_dotenv

def set_env_path(env_path):
    # Set the path to the .env file
    load_dotenv(dotenv_path=env_path)

# Define the path to the .env file
env_path = "Path_to_/.env"
# Set the environment variables from the .env file
set_env_path(env_path)

## Create our QandA application


In [2]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch

In [4]:
file = '/Users/m.maguga-darbinian/OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)
data = loader.load()

In [5]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [6]:
llm = ChatOpenAI(temperature = 0.0)
qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=index.vectorstore.as_retriever(), 
    verbose=True,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>"
    }
)

### Coming up with test datapoints

In [8]:
data[5]

Document(page_content='Product Name: Camping Tent\nDescription: Spacious camping tent for comfortable outdoor accommodation.\nPrice: 129.99\nColor: Green\nSize: 4 Person', metadata={'source': '/Users/m.maguga-darbinian/OutdoorClothingCatalog_1000.csv', 'row': 5})

In [9]:
data[6]

Document(page_content='Product Name: Rain Poncho\nDescription: Stay dry during unexpected showers with this lightweight rain poncho.\nPrice: 24.99\nColor: Yellow\nSize: One Size', metadata={'source': '/Users/m.maguga-darbinian/OutdoorClothingCatalog_1000.csv', 'row': 6})

### Hard-coded examples

In [10]:
examples = [
    {
        "query": "is the tent suitable \
            suitable for outdoor camping?",
        "answer": "Yes"
    },
    {
        "query": "What colour is the light \
        weight poncho?",
        "answer": "Yellow"
    }
]

### LLM-Generated examples

In [11]:
from langchain.evaluation.qa import QAGenerateChain

In [12]:
example_gen_chain = QAGenerateChain.from_llm(ChatOpenAI())

In [14]:
new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in data[:5]]
)

In [15]:
new_examples[0]

{'query': 'What is the product name and description of the hiking pants?',
 'answer': 'The product name is Hiking Pants and the description is Durable and lightweight hiking pants for outdoor adventures.'}

In [16]:
new_examples[1]

{'query': 'What is the product name of the item described in the document?',
 'answer': 'The product name of the item described in the document is "Waterproof Jacket."'}

In [17]:
data[0]

Document(page_content='Product Name: Hiking Pants\nDescription: Durable and lightweight hiking pants for outdoor adventures.\nPrice: 49.99\nColor: Black\nSize: L', metadata={'source': '/Users/m.maguga-darbinian/OutdoorClothingCatalog_1000.csv', 'row': 0})

In [18]:
data[1]

Document(page_content='Product Name: Waterproof Jacket\nDescription: Stay dry in rainy weather with this waterproof jacket.\nPrice: 79.99\nColor: Blue\nSize: M', metadata={'source': '/Users/m.maguga-darbinian/OutdoorClothingCatalog_1000.csv', 'row': 1})

### Combine examples

In [19]:
examples += new_examples

In [23]:
qa.run(examples[0]["query"])



[1m> Entering new  chain...[0m

[1m> Finished chain.[0m


'Yes, the camping tent is suitable for outdoor camping.'

In [24]:
qa.run(examples[1]["query"])



[1m> Entering new  chain...[0m

[1m> Finished chain.[0m


'The lightweight poncho is yellow in color.'

## Manual Evaluation

In [27]:
import langchain
langchain.debug = True

In [28]:
qa.run(examples[0]["query"])

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "is the tent suitable             suitable for outdoor camping?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "is the tent suitable             suitable for outdoor camping?",
  "context": "Product Name: Camping Tent\nDescription: Spacious camping tent for comfortable outdoor accommodation.\nPrice: 129.99\nColor: Green\nSize: 4 Person<<<<>>>>>Product Name: Outdoor Hat\nDescription: Protect yourself from the sun with this wide-brimmed outdoor hat.\nPrice: 19.99\nColor: Khaki\nSize: One Size<<<<>>>>>Product Name: Hiking Pants\nDescription: Durable and lightweight hiking pants for outdoor adventures.\nPrice: 49.99\nColor: Black\nSize: L<<<<>>>>>

'Yes, the camping tent is suitable for outdoor camping.'

In [29]:
qa.run(examples[1]["query"])

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "What colour is the light         weight poncho?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What colour is the light         weight poncho?",
  "context": "Product Name: Rain Poncho\nDescription: Stay dry during unexpected showers with this lightweight rain poncho.\nPrice: 24.99\nColor: Yellow\nSize: One Size<<<<>>>>>Product Name: Waterproof Jacket\nDescription: Stay dry in rainy weather with this waterproof jacket.\nPrice: 79.99\nColor: Blue\nSize: M<<<<>>>>>Product Name: Hiking Pants\nDescription: Durable and lightweight hiking pants for outdoor adventures.\nPrice: 49.99\nColor: Black\nSize: L<<<<>>>>>Product Name: Camping Tent\nDescrip

'The lightweight poncho is yellow in color.'

In [30]:
# Turn off the debug mode
langchain.debug = False

## LLM assisted evaluation

In [32]:
predictions = qa.apply(examples)



[1m> Entering new  chain...[0m

[1m> Finished chain.[0m


[1m> Entering new  chain...[0m

[1m> Finished chain.[0m


[1m> Entering new  chain...[0m

[1m> Finished chain.[0m


[1m> Entering new  chain...[0m

[1m> Finished chain.[0m


[1m> Entering new  chain...[0m

[1m> Finished chain.[0m


[1m> Entering new  chain...[0m

[1m> Finished chain.[0m


[1m> Entering new  chain...[0m

[1m> Finished chain.[0m


In [33]:
from langchain.evaluation.qa import QAEvalChain

In [34]:
llm = ChatOpenAI(temperature=0)
eval_chain = QAEvalChain.from_llm(llm)

In [37]:
graded_outputs = eval_chain.evaluate(examples, predictions)

In [38]:
for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Predicted Grade: " + graded_outputs[i]['text'])
    print()

Example 0:
Question: is the tent suitable             suitable for outdoor camping?
Real Answer: Yes
Predicted Answer: Yes, the camping tent is suitable for outdoor camping.
Predicted Grade: CORRECT

Example 1:
Question: What colour is the light         weight poncho?
Real Answer: Yellow
Predicted Answer: The lightweight poncho is yellow in color.
Predicted Grade: CORRECT

Example 2:
Question: What is the product name and description of the hiking pants?
Real Answer: The product name is Hiking Pants and the description is Durable and lightweight hiking pants for outdoor adventures.
Predicted Answer: The product name is Hiking Pants and the description is "Durable and lightweight hiking pants for outdoor adventures."
Predicted Grade: CORRECT

Example 3:
Question: What is the product name of the item described in the document?
Real Answer: The product name of the item described in the document is "Waterproof Jacket."
Predicted Answer: The product name of the item described in the documen