## In this notebook I have illustrated different metrics to evaluate the LLM generated outputs. I have used langchain framework here.
- Manual Evaluation
- LLM Assisted Evaluation

In [1]:
import os
from dotenv import load_dotenv, find_dotenv
import datetime
import warnings
warnings.filterwarnings('ignore')

In [2]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.indexes import VectorstoreIndexCreator
from langchain.document_loaders import CSVLoader

In [3]:
current_date = datetime.datetime.now()
target_date = datetime.datetime(2024,6,12)
if current_date>target_date:
    llm_model = "gpt-3.5-turbo"
else:
    llm_model = "gpt-3.5-turbo-0301"

In [4]:
_ = load_dotenv(find_dotenv())

In [8]:
file_path = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file_path)

In [9]:
data = loader.load()

In [11]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings
).from_loaders([loader])

In [13]:
llm = ChatOpenAI(model=llm_model, temperature=0)

qa = RetrievalQA.from_chain_type(
    llm,
    chain_type = "stuff",
    retriever=index.vectorstore.as_retriever(),
    verbose = True
)

In [14]:
data[10]

Document(page_content=": 10\nname: Cozy Comfort Pullover Set, Stripe\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\n\nSize & Fit\n- Pants are Favorite Fit: Sits lower on the waist.\n- Relaxed Fit: Our most generous fit sits farthest from the body.\n\nFabric & Care\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\n\nAdditional Features\n- Relaxed fit top with raglan sleeves and rounded hem.\n- Pull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg.\n\nImported.", metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 10})

In [15]:
data[11]

Document(page_content=': 11\nname: Ultra-Lofty 850 Stretch Down Hooded Jacket\ndescription: This technical stretch down jacket from our DownTek collection is sure to keep you warm and comfortable with its full-stretch construction providing exceptional range of motion. With a slightly fitted style that falls at the hip and best with a midweight layer, this jacket is suitable for light activity up to 20° and moderate activity up to -30°. The soft and durable 100% polyester shell offers complete windproof protection and is insulated with warm, lofty goose down. Other features include welded baffles for a no-stitch construction and excellent stretch, an adjustable hood, an interior media port and mesh stash pocket and a hem drawcord. Machine wash and dry. Imported.', metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 11})

In [29]:
examples=[
    {'qa_pairs':{
        "query":"Does the Hooded Jacket have a media port?",
        "answer":"Yes"
    }},
    {'qa_pairs':{
        "query":"Does the Pullover pants have side pockets?",
        "answer":"Yes"
    }}
]

In [17]:
from langchain.evaluation.qa import QAGenerateChain


In [18]:
example_gen_chain = QAGenerateChain.from_llm(ChatOpenAI(model=llm_model))

In [30]:
new_examples = example_gen_chain.apply_and_parse(
    [{"doc":row }for row in data[:11]]
)

new_examples[0]

{'qa_pairs': {'query': "What is the approximate weight of the Women's Campside Oxfords per pair?",
  'answer': "The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz."}}

In [31]:
examples += new_examples

In [32]:
examples[0]

{'qa_pairs': {'query': 'Does the Hooded Jacket have a media port?',
  'answer': 'Yes'}}

In [33]:
qa.run(examples[0]['qa_pairs']["query"])



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


'Yes, the Hooded Jacket does have a media port.'

## Manual Evaluation

In [35]:
import langchain
langchain.debug='True'

In [36]:
qa.run(examples[0]['qa_pairs']['query'])

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Does the Hooded Jacket have a media port?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA > chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA > chain:StuffDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Does the Hooded Jacket have a media port?",
  "context": "compatible with our Patroller Ski Pants. Pit-zips for quick ventilation and a helmet-compatible hood. Secure inner goggle pocket with media port. Imported.\n\nThe wind-and-water resistant shell has a durable polyester exterior and smooth nylon interior, while the 100% sweater fleece liner keeps you warm when the chill sets in. The 3-point zip-in system makes it easy to wear together or separate, and the adjustable noninsulated hood stows away into collar. Mesh lining increases breathability, and there's one c

'Yes, the Hooded Jacket does have a media port.'

## LLMAssisted Evaluation

In [41]:
qa_pairs = [{"query":r['qa_pairs']['query'],
            "answer":r['qa_pairs']['answer']}for r in examples]
#print(qa_pairs[0])
    

In [42]:
print(qa_pairs[0])

{'query': 'Does the Hooded Jacket have a media port?', 'answer': 'Yes'}


In [43]:
predictions = qa.apply(qa_pairs)

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Does the Hooded Jacket have a media port?",
  "answer": "Yes"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA > chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA > chain:StuffDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Does the Hooded Jacket have a media port?",
  "context": "compatible with our Patroller Ski Pants. Pit-zips for quick ventilation and a helmet-compatible hood. Secure inner goggle pocket with media port. Imported.\n\nThe wind-and-water resistant shell has a durable polyester exterior and smooth nylon interior, while the 100% sweater fleece liner keeps you warm when the chill sets in. The 3-point zip-in system makes it easy to wear together or separate, and the adjustable noninsulated hood stows away into collar. Mesh lining increases breathability

### Now we have actual answers as it is in the qa_pairs and predicted answers in predictions

In [44]:
from langchain.evaluation.qa import QAEvalChain

llm = ChatOpenAI(model=llm_model, temperature=0.2)

eval_chain = QAEvalChain.from_llm(llm)

graded_outputs = eval_chain.evaluate(qa_pairs, predictions=predictions)

[32;1m[1;3m[chain/start][0m [1m[chain:QAEvalChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "query": "Does the Hooded Jacket have a media port?",
      "answer": "Yes",
      "result": "Yes, the Hooded Jacket does have a media port."
    },
    {
      "query": "Does the Pullover pants have side pockets?",
      "answer": "Yes",
      "result": "Yes, the Pull-on pants mentioned in the description have side seam pockets with unique self gussets that help items stay put."
    },
    {
      "query": "What is the approximate weight of the Women's Campside Oxfords per pair?",
      "answer": "The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz.",
      "result": "The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz."
    },
    {
      "query": "What are the dimensions of the small and medium sizes for the Recycled Waterhog Dog Mat, Chevron Weave?",
      "answer": "The small size has dimensions of 18\" x 28\"

In [45]:
graded_outputs

[{'results': 'CORRECT'},
 {'results': 'CORRECT'},
 {'results': 'CORRECT'},
 {'results': 'CORRECT'},
 {'results': 'CORRECT'},
 {'results': 'CORRECT'},
 {'results': 'CORRECT'},
 {'results': 'CORRECT'},
 {'results': 'CORRECT'},
 {'results': 'CORRECT'},
 {'results': 'CORRECT'},
 {'results': 'CORRECT'},
 {'results': 'CORRECT'}]

In [49]:
for i,eg in enumerate(qa_pairs):
    print(f"QA PAIR {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Predicted Grade: " + graded_outputs[i]['results'])
    print()

QA PAIR 0:
Question: Does the Hooded Jacket have a media port?
Real Answer: Yes
Predicted Answer: Yes, the Hooded Jacket does have a media port.
Predicted Grade: CORRECT

QA PAIR 1:
Question: Does the Pullover pants have side pockets?
Real Answer: Yes
Predicted Answer: Yes, the Pull-on pants mentioned in the description have side seam pockets with unique self gussets that help items stay put.
Predicted Grade: CORRECT

QA PAIR 2:
Question: What is the approximate weight of the Women's Campside Oxfords per pair?
Real Answer: The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz.
Predicted Answer: The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz.
Predicted Grade: CORRECT

QA PAIR 3:
Question: What are the dimensions of the small and medium sizes for the Recycled Waterhog Dog Mat, Chevron Weave?
Real Answer: The small size has dimensions of 18" x 28" and the medium size has dimensions of 22.5" x 34.5".
Predicted Answer: The dimensions fo