# LangChain: Evaluation

## Outline:

* Example generation
* Manual evaluation (and debuging)
* LLM-assisted evaluation

In [1]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

## Create our QandA application

In [2]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch

In [3]:
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)
data = loader.load()

In [4]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [5]:
llm = ChatOpenAI(temperature = 0.0)
qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=index.vectorstore.as_retriever(), 
    verbose=True,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>"
    }
)

### Coming up with test datapoints

In [7]:
data[5]

Document(page_content='Product Name: Rain Poncho\nCategory: Outdoor Gear\nPrice: 24.99\nColor: Yellow\nSize: One Size\nDescription: Compact and lightweight rain poncho for outdoor activities\nRating: 4.1\nReviews: Keeps me dry during unexpected rain showers.', metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 5})

In [8]:
data[6]

Document(page_content='Product Name: Climbing Harness\nCategory: Climbing Gear\nPrice: 69.99\nColor: Orange\nSize: Medium\nDescription: Durable and adjustable climbing harness for safety and comfort\nRating: 4.7\nReviews: Feels secure and comfortable while climbing.', metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 6})

### Hard-coded examples

In [9]:
examples = [
    {
        "query": "Do the Cozy Comfort Pullover Set\
        have side pockets?",
        "answer": "Yes"
    },
    {
        "query": "What collection is the Ultra-Lofty \
        850 Stretch Down Hooded Jacket from?",
        "answer": "The DownTek collection"
    }
]

### LLM-Generated examples

In [10]:
from langchain.evaluation.qa import QAGenerateChain


In [11]:
example_gen_chain = QAGenerateChain.from_llm(ChatOpenAI())

In [12]:
new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in data[:5]]
)

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-KMVwMLshTdfKEkN6IMIH3AJO on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-KMVwMLshTdfKEkN6IMIH3AJO on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit ht

In [13]:
new_examples[0]

{'query': 'What is the product name and category of the item described in the document?',
 'answer': 'The product name is Hiking Jacket and the category is Hiking Gear.'}

In [14]:
data[0]

Document(page_content='Product Name: Hiking Jacket\nCategory: Hiking Gear\nPrice: 89.99\nColor: Green\nSize: Large\nDescription: Waterproof and windproof hiking jacket with breathable fabric\nRating: 4.5\nReviews: Great jacket! Keeps me dry in rainy hikes.', metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 0})

### Combine examples

In [15]:
examples += new_examples

In [16]:
qa.run(examples[0]["query"])



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


'I\'m sorry, but I don\'t have any information about the "Cozy Comfort Pullover Set" in my database.'

## Manual Evaluation

In [17]:
import langchain
langchain.debug = True

In [18]:
qa.run(examples[0]["query"])

[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Do the Cozy Comfort Pullover Set        have side pockets?"
}
[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA > 3:RunTypeEnum.chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA > 3:RunTypeEnum.chain:StuffDocumentsChain > 4:RunTypeEnum.chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Do the Cozy Comfort Pullover Set        have side pockets?",
  "context": "Product Name: Hiking Pants\nCategory: Hiking Gear\nPrice: 59.99\nColor: Gray\nSize: Medium\nDescription: Durable and comfortable hiking pants with multiple pockets\nRating: 4.2\nReviews: Perfect fit and very functional.<<<<>>>>>Product Name: Insulated Jacket\nCategory: Outdoor Gear\nPrice: 119.99\nColor: Gray\nSize: Medium\nDescription: Insulated jacket for cold weather conditions with a remov

'I\'m sorry, but I don\'t have any information about the "Cozy Comfort Pullover Set" in my database.'

In [None]:
# Turn off the debug mode
langchain.debug = False

## LLM assisted evaluation

In [19]:
predictions = qa.apply(examples)

[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Do the Cozy Comfort Pullover Set        have side pockets?",
  "answer": "Yes"
}
[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA > 3:RunTypeEnum.chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA > 3:RunTypeEnum.chain:StuffDocumentsChain > 4:RunTypeEnum.chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Do the Cozy Comfort Pullover Set        have side pockets?",
  "context": "Product Name: Hiking Pants\nCategory: Hiking Gear\nPrice: 59.99\nColor: Gray\nSize: Medium\nDescription: Durable and comfortable hiking pants with multiple pockets\nRating: 4.2\nReviews: Perfect fit and very functional.<<<<>>>>>Product Name: Insulated Jacket\nCategory: Outdoor Gear\nPrice: 119.99\nColor: Gray\nSize: Medium\nDescription: Insulated jacket for cold weather cond

Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-KMVwMLshTdfKEkN6IMIH3AJO on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-KMVwMLshTdfKEkN6IMIH3AJO on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/

[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA > 3:RunTypeEnum.chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA > 3:RunTypeEnum.chain:StuffDocumentsChain > 4:RunTypeEnum.chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What is the category of the product mentioned in the document?",
  "context": "Product Name: Cycling Jersey\nCategory: Cycling Gear\nPrice: 49.99\nColor: Blue\nSize: Small\nDescription: Breathable and moisture-wicking cycling jersey for long rides\nRating: 4.4\nReviews: Love the design and fabric quality.<<<<>>>>>Product Name: Insulated Jacket\nCategory: Outdoor Gear\nPrice: 119.99\nColor: Gray\nSize: Medium\nDescription: Insulated jacket for cold weather conditions with a removable hood\nRating: 4.9\nReviews: Perfect for winter hiking and skiing.<<<<>>>>>Product Name: Camping Tent\nCategory: Camping Gear\nPrice: 199.99\nColor: Blue\nSize: 4-P

Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-KMVwMLshTdfKEkN6IMIH3AJO on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-KMVwMLshTdfKEkN6IMIH3AJO on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/

[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA > 3:RunTypeEnum.chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA > 3:RunTypeEnum.chain:StuffDocumentsChain > 4:RunTypeEnum.chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What is the product name and category mentioned in the document?",
  "context": "Product Name: Cycling Jersey\nCategory: Cycling Gear\nPrice: 49.99\nColor: Blue\nSize: Small\nDescription: Breathable and moisture-wicking cycling jersey for long rides\nRating: 4.4\nReviews: Love the design and fabric quality.<<<<>>>>>Product Name: Insulated Jacket\nCategory: Outdoor Gear\nPrice: 119.99\nColor: Gray\nSize: Medium\nDescription: Insulated jacket for cold weather conditions with a removable hood\nRating: 4.9\nReviews: Perfect for winter hiking and skiing.<<<<>>>>>Product Name: Camping Tent\nCategory: Camping Gear\nPrice: 199.99\nColor: Blue\nSize: 4

Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-KMVwMLshTdfKEkN6IMIH3AJO on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-KMVwMLshTdfKEkN6IMIH3AJO on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/

[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA > 3:RunTypeEnum.chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA > 3:RunTypeEnum.chain:StuffDocumentsChain > 4:RunTypeEnum.chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What is the price of the Trail Running Shoes?",
  "context": "Product Name: Trail Running Shoes\nCategory: Running Gear\nPrice: 119.99\nColor: Black\nSize: US 10\nDescription: Lightweight and supportive trail running shoes for all terrains\nRating: 4.6\nReviews: Excellent grip and comfortable for long runs.<<<<>>>>>Product Name: Hiking Jacket\nCategory: Hiking Gear\nPrice: 89.99\nColor: Green\nSize: Large\nDescription: Waterproof and windproof hiking jacket with breathable fabric\nRating: 4.5\nReviews: Great jacket! Keeps me dry in rainy hikes.<<<<>>>>>Product Name: Hiking Pants\nCategory: Hiking Gear\nPrice: 59.99\nColor: Gray\nSize: Medium\n

Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-KMVwMLshTdfKEkN6IMIH3AJO on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-KMVwMLshTdfKEkN6IMIH3AJO on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/

[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA > 3:RunTypeEnum.chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA > 3:RunTypeEnum.chain:StuffDocumentsChain > 4:RunTypeEnum.chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What is the product name and category of the item described in the document?",
  "context": "Product Name: Cycling Jersey\nCategory: Cycling Gear\nPrice: 49.99\nColor: Blue\nSize: Small\nDescription: Breathable and moisture-wicking cycling jersey for long rides\nRating: 4.4\nReviews: Love the design and fabric quality.<<<<>>>>>Product Name: Insulated Jacket\nCategory: Outdoor Gear\nPrice: 119.99\nColor: Gray\nSize: Medium\nDescription: Insulated jacket for cold weather conditions with a removable hood\nRating: 4.9\nReviews: Perfect for winter hiking and skiing.<<<<>>>>>Product Name: Rain Poncho\nCategory: Outdoor Gear\nPrice: 24.99\nColor: Yel

In [20]:
from langchain.evaluation.qa import QAEvalChain

In [21]:
llm = ChatOpenAI(temperature=0)
eval_chain = QAEvalChain.from_llm(llm)

In [22]:
graded_outputs = eval_chain.evaluate(examples, predictions)

[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:QAEvalChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "query": "Do the Cozy Comfort Pullover Set        have side pockets?",
      "answer": "Yes",
      "result": "I'm sorry, but I don't have any information about the \"Cozy Comfort Pullover Set\" in my database."
    },
    {
      "query": "What collection is the Ultra-Lofty         850 Stretch Down Hooded Jacket from?",
      "answer": "The DownTek collection",
      "result": "I'm sorry, but I don't have that information."
    },
    {
      "query": "What is the product name and category of the item described in the document?",
      "answer": "The product name is Hiking Jacket and the category is Hiking Gear.",
      "result": "The product name is Cycling Jersey and the category is Cycling Gear."
    },
    {
      "query": "What is the category of the product mentioned in the document?",
      "answer": "The category of the product mentioned in th

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-KMVwMLshTdfKEkN6IMIH3AJO on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-KMVwMLshTdfKEkN6IMIH3AJO on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit ht

[36;1m[1;3m[llm/end][0m [1m[1:RunTypeEnum.chain:QAEvalChain > 2:RunTypeEnum.llm:ChatOpenAI] [83.84s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "INCORRECT",
        "generation_info": {
          "finish_reason": "stop"
        },
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "INCORRECT",
            "additional_kwargs": {}
          }
        }
      }
    ]
  ],
  "llm_output": {
    "token_usage": {
      "prompt_tokens": 192,
      "completion_tokens": 3,
      "total_tokens": 195
    },
    "model_name": "gpt-3.5-turbo"
  },
  "run": null
}
[36;1m[1;3m[llm/end][0m [1m[1:RunTypeEnum.chain:QAEvalChain > 2:RunTypeEnum.llm:ChatOpenAI] [83.84s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "INCORRECT",
   

In [24]:
for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    # print("Predicted Grade: " + graded_outputs[i]['text'])
    print()

Example 0:
Question: Do the Cozy Comfort Pullover Set        have side pockets?
Real Answer: Yes
Predicted Answer: I'm sorry, but I don't have any information about the "Cozy Comfort Pullover Set" in my database.

Example 1:
Question: What collection is the Ultra-Lofty         850 Stretch Down Hooded Jacket from?
Real Answer: The DownTek collection
Predicted Answer: I'm sorry, but I don't have that information.

Example 2:
Question: What is the product name and category of the item described in the document?
Real Answer: The product name is Hiking Jacket and the category is Hiking Gear.
Predicted Answer: The product name is Cycling Jersey and the category is Cycling Gear.

Example 3:
Question: What is the category of the product mentioned in the document?
Real Answer: The category of the product mentioned in the document is Hiking Gear.
Predicted Answer: The category of the product mentioned in the document is Cycling Gear.

Example 4:
Question: What is the product name and category me