**Metrics to evaluate retrieval:** 
offers context_relevancy and context_recall which give you the measure of the performance of your retrieval system.

**Metrics to evaluate generation:** 
offers faithfulness which measures hallucinations and answer_relevancy which measures how to the point the answers are to the question.

In [2]:
import os, json
import openai
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.azuresearch import AzureSearch
from langchain.chains import RetrievalQA
from langchain.chat_models import AzureChatOpenAI

from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

True

In [3]:
openai.api_key = os.environ['OPENAI_API_KEY']
openai.api_base = os.environ['OPENAI_API_BASE']
openai.api_type = os.environ['OPENAI_API_TYPE']
openai.api_version = os.environ['OPENAI_API_VERSION']

chat_model = os.environ['GPT4_MODEL_NAME']
embedding_model = os.environ['EMBEDDING_MODEL_NAME']
embeddings: OpenAIEmbeddings = OpenAIEmbeddings(model=embedding_model, chunk_size=1)
vector_store_endpoint: str = os.environ['AZURE_COGNITIVE_SEARCH_ENDPOINT']
vector_store_password: str = os.environ['AZURE_COGNITIVE_SEARCH_KEY']
index_name: str = "contoso-coffee-index"

vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=vector_store_endpoint,
    azure_search_key=vector_store_password,
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)

In [5]:
llm = AzureChatOpenAI(temperature=0.0,
    max_tokens=400,
    openai_api_base=openai.api_base,
    openai_api_version=openai.api_version,
    deployment_name=os.environ['GPT4_MODEL_NAME'],
    openai_api_key=openai.api_key,
    openai_api_type = openai.api_type,
)

retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 15})

qa_chain = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    return_source_documents=False, 
)

# qa_chain = RetrievalQA.from_chain_type(
#     llm, retriever=retriever, return_source_documents=True
# )

question = "what are your bakery items?"
result = qa_chain({"query": question})
result["result"]

'The bakery items available are:\n\n1. Cinnamon Roll: A sweet roll covered in delicious cinnamon and sugar. It costs $4.0.\n2. Blueberry Muffin: A soft and moist muffin packed with fresh blueberries and a crumb topping. It costs $2.5.\n3. Bagel: A delicious, dense and chewy bread product, perfect for a quick breakfast. It costs $1.5.\n4. Scones: Buttery and crumbly traditional English baked good. It costs $3.0.\n5. Croissant: A buttery, flaky pastry, freshly baked to golden perfection. It costs $2.0.\n6. Pecan Pie: A classic dessert filled with sweet, nutty pecans and a flaky crust. It costs $4.5.\n7. Brownie: A rich, fudgy brownie with a moist center and crispy edges. It costs $3.0.\n8. Banana Bread: A moist, sweet bread with a rich banana flavor. It costs $4.0.\n9. Cheese Bread: A soft, tender bread loaded with melted cheese. It costs $3.5.\n10. Peach Cobbler: A classic dessert filled with sweet, juicy peaches and a flaky crust. It costs $4.5.\n11. Strawberry Tart: A delightful tart 

In [11]:
from ragas.metrics import faithfulness, answer_relevancy, context_relevancy, context_recall
from ragas.langchain import RagasEvaluatorChain

eval_chains = {
    m.name: RagasEvaluatorChain(metric=m) 
    for m in [faithfulness, answer_relevancy, context_relevancy, context_recall]
}
eval_chains

ImportError: cannot import name 'AzureOpenAIEmbeddings' from 'langchain.embeddings' (c:\Users\shchitt\AppData\Local\anaconda3\envs\food\lib\site-packages\langchain\embeddings\__init__.py)