# LangChain: Evaluation

## Outline:

* Example generation
* Manual evaluation (and debuging)
* LLM-assisted evaluation
* LangChain evaluation platform

In [1]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [2]:
# account for deprecation of LLM model
import datetime
# Get the current date
current_date = datetime.datetime.now().date()

# Define the date after which the model should be set to "gpt-3.5-turbo"
target_date = datetime.date(2024, 6, 12)

# Set the model variable based on the current date
if current_date > target_date:
    llm_model = "gpt-3.5-turbo"
else:
    llm_model = "gpt-3.5-turbo-0301"

## Create a QandA application

In [3]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch

In [5]:
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)
data = loader.load()

In [47]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [48]:
llm = ChatOpenAI(temperature = 0.0, model=llm_model)
qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=index.vectorstore.as_retriever(), 
    verbose=True,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>"
    }
)

# Test datapoints

In [49]:
data[10], data[11]

(Document(page_content=": 10\nname: Cozy Comfort Pullover Set, Stripe\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\n\nSize & Fit\n- Pants are Favorite Fit: Sits lower on the waist.\n- Relaxed Fit: Our most generous fit sits farthest from the body.\n\nFabric & Care\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\n\nAdditional Features\n- Relaxed fit top with raglan sleeves and rounded hem.\n- Pull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg.\n\nImported.", metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 10}),
 Document(page_content=': 11\nname: Ultra-Lofty 850 Stretch Down Hooded Jacket\ndescription: This technical stretch down jacket from our DownTek collection is sure to keep you warm and comfortable with its full-stretch construction providing excep

# in the following example we've read the documents data and wrote a query and wrote an answer.

In [50]:
examples = [
    {
        "query": "Do the Cozy Comfort Pullover Set\
        have side pockets?",
        "answer": "Yes"
    },
    {
        "query": "What collection is the Ultra-Lofty \
        850 Stretch Down Hooded Jacket from?",
        "answer": "The DownTek collection"
    }
]

# LLM generated examples

In [51]:
from langchain.evaluation.qa import QAGenerateChain
# It creates a question, answer pair


In [52]:
example_gen_chain = QAGenerateChain.from_llm(ChatOpenAI(model=llm_model))

# the warning below can be safely ignored

In [53]:
new_examples = example_gen_chain.apply(
    [{"doc": t} for t in data[:5]]
)

In [54]:
new_examples

[{'qa_pairs': {'query': "What is the approximate weight of the Women's Campside Oxfords per pair?",
   'answer': "The approximate weight of the Women's Campside Oxfords per pair is 1 lb.1 oz."}},
 {'qa_pairs': {'query': 'What are the dimensions of the small and medium Recycled Waterhog Dog Mats?',
   'answer': 'The dimensions of the small Recycled Waterhog Dog Mat are 18" x 28" and the dimensions of the medium Recycled Waterhog Dog Mat are 22.5" x 34.5".'}},
 {'qa_pairs': {'query': "What features does the Infant and Toddler Girls' Coastal Chill Swimsuit have?",
   'answer': "The swimsuit has bright colors, ruffles, and exclusive whimsical prints. It also features four-way-stretch and chlorine-resistant fabric that keeps its shape and resists snags. The swimsuit is UPF 50+ rated, providing the highest rated sun protection possible, blocking 98% of the sun's harmful rays. Additionally, it has crossover no-slip straps and is fully lined on the bottom to ensure a secure fit and maximum cov

In [55]:
new_ex_clean = []
for item in new_examples:
    for k, v in item.items():
        new_ex_clean.append(v)
print(new_ex_clean)

[{'query': "What is the approximate weight of the Women's Campside Oxfords per pair?", 'answer': "The approximate weight of the Women's Campside Oxfords per pair is 1 lb.1 oz."}, {'query': 'What are the dimensions of the small and medium Recycled Waterhog Dog Mats?', 'answer': 'The dimensions of the small Recycled Waterhog Dog Mat are 18" x 28" and the dimensions of the medium Recycled Waterhog Dog Mat are 22.5" x 34.5".'}, {'query': "What features does the Infant and Toddler Girls' Coastal Chill Swimsuit have?", 'answer': "The swimsuit has bright colors, ruffles, and exclusive whimsical prints. It also features four-way-stretch and chlorine-resistant fabric that keeps its shape and resists snags. The swimsuit is UPF 50+ rated, providing the highest rated sun protection possible, blocking 98% of the sun's harmful rays. Additionally, it has crossover no-slip straps and is fully lined on the bottom to ensure a secure fit and maximum coverage."}, {'query': 'What is the fabric composition 

In [56]:
data[0]

Document(page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries.", metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 0})

In [57]:
examples += new_ex_clean

In [58]:
qa.run(examples[0]["query"])



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


'The Cozy Comfort Pullover Set, Stripe does have side pockets.'

## Manual Evaluation of the above examples

In [80]:
import langchain
langchain.debug = True

In [81]:
qa.run(examples[0]['query'])

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Do the Cozy Comfort Pullover Set        have side pockets?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Do the Cozy Comfort Pullover Set        have side pockets?",
  "context": ": 10\nname: Cozy Comfort Pullover Set, Stripe\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\n\nSize & Fit\n- Pants are Favorite Fit: Sits lower on the waist.\n- Relaxed Fit: Our most generous fit sits farthest from the body.\n\nFabric & Care\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\

'The Cozy Comfort Pullover Set, Stripe does have side pockets.'

In [61]:
# Turn off the debug mode
langchain.debug = False

## LLM assisted evaluation

In [62]:
print(examples)

[{'query': 'Do the Cozy Comfort Pullover Set        have side pockets?', 'answer': 'Yes'}, {'query': 'What collection is the Ultra-Lofty         850 Stretch Down Hooded Jacket from?', 'answer': 'The DownTek collection'}, {'query': "What is the approximate weight of the Women's Campside Oxfords per pair?", 'answer': "The approximate weight of the Women's Campside Oxfords per pair is 1 lb.1 oz."}, {'query': 'What are the dimensions of the small and medium Recycled Waterhog Dog Mats?', 'answer': 'The dimensions of the small Recycled Waterhog Dog Mat are 18" x 28" and the dimensions of the medium Recycled Waterhog Dog Mat are 22.5" x 34.5".'}, {'query': "What features does the Infant and Toddler Girls' Coastal Chill Swimsuit have?", 'answer': "The swimsuit has bright colors, ruffles, and exclusive whimsical prints. It also features four-way-stretch and chlorine-resistant fabric that keeps its shape and resists snags. The swimsuit is UPF 50+ rated, providing the highest rated sun protection

In [63]:
predictions = qa.apply(examples)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [64]:
from langchain.evaluation.qa import QAEvalChain

In [65]:
llm = ChatOpenAI(temperature=0, model=llm_model)
eval_chain = QAEvalChain.from_llm(llm)

In [82]:
graded_outputs = eval_chain.evaluate(examples, predictions)

[32;1m[1;3m[chain/start][0m [1m[1:chain:QAEvalChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "query": "Do the Cozy Comfort Pullover Set        have side pockets?",
      "answer": "Yes",
      "result": "Yes, the Cozy Comfort Pullover Set, Stripe has side pockets."
    },
    {
      "query": "What collection is the Ultra-Lofty         850 Stretch Down Hooded Jacket from?",
      "answer": "The DownTek collection",
      "result": "The Ultra-Lofty 850 Stretch Down Hooded Jacket is from the DownTek collection."
    },
    {
      "query": "What is the approximate weight of the Women's Campside Oxfords per pair?",
      "answer": "The approximate weight of the Women's Campside Oxfords per pair is 1 lb.1 oz.",
      "result": "The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz."
    },
    {
      "query": "What are the dimensions of the small and medium Recycled Waterhog Dog Mats?",
      "answer": "The dimensions of the small Rec

In [76]:
# len(predictions), len(examples)
# print(predictions)

[{'query': 'Do the Cozy Comfort Pullover Set        have side pockets?', 'answer': 'Yes', 'result': 'Yes, the Cozy Comfort Pullover Set, Stripe has side pockets.'}, {'query': 'What collection is the Ultra-Lofty         850 Stretch Down Hooded Jacket from?', 'answer': 'The DownTek collection', 'result': 'The Ultra-Lofty 850 Stretch Down Hooded Jacket is from the DownTek collection.'}, {'query': "What is the approximate weight of the Women's Campside Oxfords per pair?", 'answer': "The approximate weight of the Women's Campside Oxfords per pair is 1 lb.1 oz.", 'result': "The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz."}, {'query': 'What are the dimensions of the small and medium Recycled Waterhog Dog Mats?', 'answer': 'The dimensions of the small Recycled Waterhog Dog Mat are 18" x 28" and the dimensions of the medium Recycled Waterhog Dog Mat are 22.5" x 34.5".', 'result': 'The dimensions of the small Recycled Waterhog Dog Mat are 18" x 28" and the dimensio

In [85]:
for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Predicted Grade: " + graded_outputs[i]['results'])
    print()

Example 0:
Question: Do the Cozy Comfort Pullover Set        have side pockets?
Real Answer: Yes
Predicted Answer: Yes, the Cozy Comfort Pullover Set, Stripe has side pockets.
Predicted Grade: CORRECT

Example 1:
Question: What collection is the Ultra-Lofty         850 Stretch Down Hooded Jacket from?
Real Answer: The DownTek collection
Predicted Answer: The Ultra-Lofty 850 Stretch Down Hooded Jacket is from the DownTek collection.
Predicted Grade: CORRECT

Example 2:
Question: What is the approximate weight of the Women's Campside Oxfords per pair?
Real Answer: The approximate weight of the Women's Campside Oxfords per pair is 1 lb.1 oz.
Predicted Answer: The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz.
Predicted Grade: CORRECT

Example 3:
Question: What are the dimensions of the small and medium Recycled Waterhog Dog Mats?
Real Answer: The dimensions of the small Recycled Waterhog Dog Mat are 18" x 28" and the dimensions of the medium Recycled Waterhog D