In [1]:
import os
os.environ["NVIDIA_API_KEY"] = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"

In [2]:
from langchain_nvidia_ai_endpoints import ChatNVIDIA
llm_nvidia = ChatNVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1")

In [3]:
from langchain_community.chat_models import ChatOllama

llm_ollama = ChatOllama(model="llama3")

In [4]:
llm = llm_nvidia

# LangChain Evaluation

## Create our QandA application

In [5]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch

In [6]:
file = 'data/files/OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)
data = loader.load()

In [7]:
from langchain_huggingface import HuggingFaceEmbeddings

embedding = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")

index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding = embedding
).from_loaders([loader])

  from tqdm.autonotebook import tqdm, trange


In [8]:
qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=index.vectorstore.as_retriever(), 
    verbose=True,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>"
    }
)

In [9]:
data[10]

Document(page_content=": 10\nname: Cozy Comfort Pullover Set, Stripe\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\n\nSize & Fit\n- Pants are Favorite Fit: Sits lower on the waist.\n- Relaxed Fit: Our most generous fit sits farthest from the body.\n\nFabric & Care\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\n\nAdditional Features\n- Relaxed fit top with raglan sleeves and rounded hem.\n- Pull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg.\n\nImported.", metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 10})

### Hard-coded examples

In [10]:
examples = [
    {"query": "Do the Cozy Comfort Pullover Set\
    have side pockets?",
    "answer": "Yes"
    },
     {"query": "What collection is the Ultra-Lofty \
    850 Stretch Down Hooded Jacket from?",
    "answer": "The DownTek collection"
     }
]


### LLM-Generated examples

In [11]:
from langchain.evaluation.qa import QAGenerateChain

In [12]:
example_gen_chain = QAGenerateChain.from_llm(llm)

In [13]:
new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in data[:5]]
)



In [14]:
new_examples = [ex.get("qa_pairs") for ex in new_examples]

In [15]:
new_examples[0]

{'query': "What is the material of the Women's Campside Oxfords and what is its special feature?",
 'answer': "The Women's Campside Oxfords are made of soft canvas material, which provides a broken-in feel and look. One special feature of these shoes is the Cleansport NXT® antimicrobial odor control in the innersole."}

In [16]:
data[0]

Document(page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries.", metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 0})

In [17]:
examples += new_examples

In [18]:
examples[0]

{'query': 'Do the Cozy Comfort Pullover Set    have side pockets?',
 'answer': 'Yes'}

In [19]:
qa.run(examples[0]["query"])

  warn_deprecated(




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


' Yes, the Cozy Comfort Pullover Set, Stripe (named 10 and 73) has side pockets on the pull-on pants according to the provided descriptions.'

### Manual Evaluation¶

In [20]:
import langchain
langchain.debug = True

In [21]:
qa.run(examples[0]["query"])

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Do the Cozy Comfort Pullover Set    have side pockets?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA > chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA > chain:StuffDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Do the Cozy Comfort Pullover Set    have side pockets?",
  "context": ": 10\nname: Cozy Comfort Pullover Set, Stripe\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\n\nSize & Fit\n- Pants are Favorite Fit: Sits lower on the waist.\n- Relaxed Fit: Our most generous fit sits farthest from the body.\n\nFabric & Care\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\n\nAdditional Featur

' Yes, the Cozy Comfort Pullover Set, Stripe (named 10 and 73) has side pockets on the pull-on pants according to the provided descriptions.'

In [22]:
# Turn off the debug mode
langchain.debug = False

### LLM assisted evaluation

In [23]:
examples[2]

{'query': "What is the material of the Women's Campside Oxfords and what is its special feature?",
 'answer': "The Women's Campside Oxfords are made of soft canvas material, which provides a broken-in feel and look. One special feature of these shoes is the Cleansport NXT® antimicrobial odor control in the innersole."}

In [24]:
predictions = qa.batch(examples)



[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


In [25]:
predictions

[{'query': 'Do the Cozy Comfort Pullover Set    have side pockets?',
  'answer': 'Yes',
  'result': ' Yes, the Cozy Comfort Pullover Set, Stripe (named 10 and 73) has side pockets on the pull-on pants according to the provided descriptions.'},
 {'query': 'What collection is the Ultra-Lofty     850 Stretch Down Hooded Jacket from?',
  'answer': 'The DownTek collection',
  'result': ' The Ultra-Lofty 850 Stretch Down Hooded Jacket is from the DownTek collection. I know this because the name "DownTek" is mentioned in the description of the jacket, and it is a collection mentioned in the context provided.'},
 {'query': "What is the material of the Women's Campside Oxfords and what is its special feature?",
  'answer': "The Women's Campside Oxfords are made of soft canvas material, which provides a broken-in feel and look. One special feature of these shoes is the Cleansport NXT® antimicrobial odor control in the innersole.",
  'result': " The Women's Campside Oxfords are made of a soft can

In [26]:
from langchain.evaluation.qa import QAEvalChain

In [27]:
eval_chain = QAEvalChain.from_llm(llm)

In [28]:
graded_outputs = eval_chain.evaluate(examples, predictions)

In [29]:
graded_outputs

[{'results': ' CORRECT\n\nThe student answer correctly states that the Cozy Comfort Pullover Set has side pockets, based on the provided descriptions. This matches the true answer.'},
 {'results': ' CORRECT\n\nThe student answer correctly identifies the Ultra-Lofty 850 Stretch Down Hooded Jacket as being from the DownTek collection. The fact that the student included additional information about how they determined this does not affect the accuracy of the answer.'},
 {'results': ' CORRECT'},
 {'results': ' GRADE: CORRECT\n\nThe student answer includes all the necessary and accurate information as stated in the true answer. The student answer expands on the true answer by mentioning that the mat can be vacuumed or hosed clean, and that it is constructed in the USA – both of which are correct and do not conflict with the true answer. Therefore, the student answer is factually accurate and can be considered correct.'},
 {'results': " CORRECT\n\nThe student answer correctly lists all the k

In [30]:
for i, eg in enumerate(examples):
    print("_"*50)
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Predicted Grade: " + graded_outputs[i]['results'])
    print("_"*50)

__________________________________________________
Example 0:
Question: Do the Cozy Comfort Pullover Set    have side pockets?
Real Answer: Yes
Predicted Answer:  Yes, the Cozy Comfort Pullover Set, Stripe (named 10 and 73) has side pockets on the pull-on pants according to the provided descriptions.
Predicted Grade:  CORRECT

The student answer correctly states that the Cozy Comfort Pullover Set has side pockets, based on the provided descriptions. This matches the true answer.
__________________________________________________
__________________________________________________
Example 1:
Question: What collection is the Ultra-Lofty     850 Stretch Down Hooded Jacket from?
Real Answer: The DownTek collection
Predicted Answer:  The Ultra-Lofty 850 Stretch Down Hooded Jacket is from the DownTek collection. I know this because the name "DownTek" is mentioned in the description of the jacket, and it is a collection mentioned in the context provided.
Predicted Grade:  CORRECT

The student 