In [5]:
import json
import logging
from typing import List

from langchain.chat_models import AzureChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS

In [6]:
loader = TextLoader("../state_of_the_union.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

In [7]:
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cuda'}
encode_kwargs = {'normalize_embeddings': False}

hf_embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [8]:
hf_embeddings_vector_db = FAISS.from_documents(docs, hf_embeddings)
hf_embeddings_vector_db.save_local("faiss_index")

In [9]:
question = "What did the president say about economics?"
docs = hf_embeddings_vector_db.similarity_search(question)
docs[0].page_content

'We’re going after the criminals who stole billions in relief money meant for small businesses and millions of Americans.  \n\nAnd tonight, I’m announcing that the Justice Department will name a chief prosecutor for pandemic fraud. \n\nBy the end of this year, the deficit will be down to less than half what it was before I took office.  \n\nThe only president ever to cut the deficit by more than one trillion dollars in a single year. \n\nLowering your costs also means demanding more competition. \n\nI’m a capitalist, but capitalism without competition isn’t capitalism. \n\nIt’s exploitation—and it drives up prices. \n\nWhen corporations don’t have to compete, their profits go up, your prices go up, and small businesses and family farmers and ranchers go under. \n\nWe see it happening with ocean carriers moving goods in and out of America. \n\nDuring the pandemic, these foreign-owned companies raised prices by as much as 1,000% and made record profits.'

In [10]:
gpt35_azure_llm = AzureChatOpenAI(
    temperature=0,
    openai_api_key="02e3dbabaf334ccb959cbeadbd3f99c3",
    openai_api_base="https://llm-x-gpt.openai.azure.com/",
    deployment_name='LLM-X-GPT35-TURBO',
    openai_api_version="2023-03-15-preview"
)

In [67]:
from langchain.embeddings.openai import OpenAIEmbeddings

openai_azure_embeddings = OpenAIEmbeddings(
    openai_api_key="02e3dbabaf334ccb959cbeadbd3f99c3",
    openai_api_base="https://llm-x-gpt.openai.azure.com/",
    deployment_name='LLM-X-Embedding'
)

In [12]:
logging.basicConfig()
logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)
retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=hf_embeddings_vector_db.as_retriever(),
    llm=gpt35_azure_llm
)

unique_docs = retriever_from_llm.get_relevant_documents(query=question)

INFO:langchain.retrievers.multi_query:Generated queries: ["1. Can you provide any information on the president's statements regarding the field of economics?", "2. I'm interested in knowing the president's views and comments on the subject of economics. Could you share any relevant information?", '3. Could you please share any insights or remarks made by the president in relation to economics?']


In [13]:
from langchain.chains import RetrievalQA

qa_rag_chain = RetrievalQA.from_chain_type(
    llm=gpt35_azure_llm,
    retriever=hf_embeddings_vector_db.as_retriever()
)

question = "What did the president say about Zelensky?"
qa_rag_chain({"query": question})

{'query': 'What did the president say about Zelensky?',
 'result': 'The president mentioned President Zelenskyy of Ukraine and praised the fearlessness, courage, and determination of the Ukrainian people.'}

In [14]:
from langchain.chains import QAGenerationChain
from langchain import PromptTemplate

templ = """You are a smart assistant designed to help high school teachers come up with reading comprehension questions.
Given a piece of text, you must come up with a {k} different question and answer pairs that can be used to test a student's reading comprehension abilities.
When coming up with this question/answer pair, each pair must be respond in the following format:

{{
    "question": "$YOUR_QUESTION_HERE",
    "answer": "$THE_ANSWER_HERE"
}}

So in your final answer you should response with a list of {k} pairs in this format:

```
[{{
    "question": "$YOUR_QUESTION_HERE",
    "answer": "$THE_ANSWER_HERE"
}},
 {{
    "question": "$YOUR_QUESTION_HERE",
    "answer": "$THE_ANSWER_HERE"
}},
 {{
    "question": "$YOUR_QUESTION_HERE",
    "answer": "$THE_ANSWER_HERE"
    }}
]
```

Please come up with a list of {k} question/answer pairs, in the specified list of JSONS format, for the following text:
----------------
{text}
"""

multi_qa_prompt = PromptTemplate.from_template(template=templ, partial_variables={"k": 5})
qa_generation_chain = QAGenerationChain.from_llm(llm=gpt35_azure_llm, prompt=multi_qa_prompt)

In [15]:
qna_GT = qa_generation_chain.run(docs[0].page_content)[0]
qna_GT

[{'question': 'What is the purpose of the chief prosecutor for pandemic fraud?',
  'answer': 'To go after the criminals who stole relief money meant for small businesses and Americans.'},
 {'question': 'How much will the deficit be reduced to by the end of this year?',
  'answer': 'Less than half of what it was before the current president took office.'},
 {'question': "What does the speaker mean by 'capitalism without competition isn't capitalism'?",
  'answer': 'The speaker believes that true capitalism requires competition, and without it, it becomes exploitation.'},
 {'question': "What happens to prices when corporations don't have to compete?",
  'answer': 'Prices go up.'},
 {'question': 'What did foreign-owned ocean carriers do during the pandemic?',
  'answer': 'They raised prices by as much as 1,000% and made record profits.'}]

In [16]:
questions_and_answers_llm = []
for i, qa in enumerate(qna_GT):
    question, answer = qa["question"], qa["answer"]
    llm_answer = qa_rag_chain({"query": question})["result"]
    questions_and_answers_llm.append({"question": question, "result": llm_answer})
    print(f"QA number {i + 1} \n")
    print(f"Question: {question}\n")
    print(f"Answer: {answer}\n")
    print(f"LLM Answer: {llm_answer}\n")
    print("--------------------------------------------------\n\n")

QA number 1 

Question: What is the purpose of the chief prosecutor for pandemic fraud?

Answer: To go after the criminals who stole relief money meant for small businesses and Americans.

LLM Answer: The purpose of the chief prosecutor for pandemic fraud is to go after the criminals who stole billions in relief money meant for small businesses and millions of Americans. They will be responsible for investigating and prosecuting cases of fraud related to the misuse of funds intended for pandemic relief.

--------------------------------------------------


QA number 2 

Question: How much will the deficit be reduced to by the end of this year?

Answer: Less than half of what it was before the current president took office.

LLM Answer: The given context does not provide specific information about the projected reduction of the deficit by the end of this year. Therefore, I don't have the information to answer your question.

--------------------------------------------------


QA number

In [18]:
from langchain.evaluation import QAEvalChain

eval_prompt = GRADE_ANSWER_PROMPT

eval_chain = QAEvalChain.from_llm(
    llm=gpt35_azure_llm,
    prompt=eval_prompt
)

In [19]:
graded_outputs = eval_chain.evaluate(
    examples=qna_GT,
    predictions=questions_and_answers_llm,
    question_key="question",
    prediction_key="result"
)
graded_outputs

[{'results': "GRADE: Correct\n\nJUSTIFICATION: The student answer accurately states that the purpose of the chief prosecutor for pandemic fraud is to go after the criminals who stole relief money meant for small businesses and Americans. The student's answer also includes additional information about investigating and prosecuting cases of fraud related to the misuse of funds intended for pandemic relief, which does not conflict with the true answer."},
 {'results': 'GRADE: Incorrect\n\nJUSTIFICATION: The student answer states that there is no specific information provided in the context about the projected reduction of the deficit by the end of this year. However, the true answer does provide specific information, stating that the deficit will be reduced to less than half of what it was before the current president took office.'},
 {'results': 'GRADE: Correct\n\nJUSTIFICATION: The student answer accurately explains that the speaker believes capitalism without competition leads to explo

In [20]:
for qa_grade in graded_outputs:
    print(f"{qa_grade['results']}")
    print("--------------------------------------------------\n\n")

GRADE: Correct

JUSTIFICATION: The student answer accurately states that the purpose of the chief prosecutor for pandemic fraud is to go after the criminals who stole relief money meant for small businesses and Americans. The student's answer also includes additional information about investigating and prosecuting cases of fraud related to the misuse of funds intended for pandemic relief, which does not conflict with the true answer.
--------------------------------------------------


GRADE: Incorrect

JUSTIFICATION: The student answer states that there is no specific information provided in the context about the projected reduction of the deficit by the end of this year. However, the true answer does provide specific information, stating that the deficit will be reduced to less than half of what it was before the current president took office.
--------------------------------------------------


GRADE: Correct

JUSTIFICATION: The student answer accurately explains that the speaker be

In [68]:
openai_azure_embeddings = OpenAIEmbeddings(
    openai_api_key="02e3dbabaf334ccb959cbeadbd3f99c3",
    openai_api_base="https://llm-x-gpt.openai.azure.com/",
    deployment_name='LLM-X-Embedding'
)

In [69]:
from langchain.evaluation import load_evaluator, EvaluatorType
from langchain.evaluation import EmbeddingDistance

evaluator = load_evaluator(evaluator=EvaluatorType.PAIRWISE_EMBEDDING_DISTANCE,
                           distance_metric=EmbeddingDistance.COSINE,
                           embeddings=openai_azure_embeddings,
                           llm=gpt35_azure_llm)

evaluator.evaluate_string_pairs(
    prediction="Seattle is very hot in June", prediction_b="Seattle is cool in June."
)

InvalidRequestError: Resource not found

In [None]:
evaluator.l  QA

In [54]:
from langchain.evaluation import PairwiseEmbeddingDistanceEvalChain

PairwiseEmbeddingDistanceEvalChain

PairwiseEmbeddingDistanceEvalChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, embeddings=OpenAIEmbeddings(client=<class 'openai.api_resources.embedding.Embedding'>, model='text-embedding-ada-002', deployment='text-embedding-ada-002', openai_api_version='', openai_api_base='https://llm-x-gpt.openai.azure.com/', openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key='02e3dbabaf334ccb959cbeadbd3f99c3', openai_organization='', allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=6, request_timeout=None, headers=None, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={'deployment_name': 'LLM-X-Embedding'}), distance_metric=<EmbeddingDistance.COSINE: 'cosine'>)

In [58]:
def run_evaluation(qa_rag_chain, qna_GT):
    predictions_list = []

    for qna in qna_GT:
        question, answer = qna["question"], qna["answer"]
        qa_rag_chain_answer = qa_rag_chain({"query": question})["result"]
        predictions_list.append({"question": question, "answer": answer, "result": qa_rag_chain_answer})

    answers_grade = grade_model_answer(qna_GT, predictions_list)

    return answers_grade, predictions_list


def grade_model_answer(qna_GT: List, predictions: List) -> List:
    graded_outputs = []

    evaluator = load_evaluator(evaluator=EvaluatorType.LABELED_CRITERIA,
                           criteria="correctness",
                           llm=gpt35_azure_llm)

    for i, qna in enumerate(qna_GT):
        question, answer = qna["question"], qna["answer"]
        grade = evaluator.evaluate_strings(input=question,
                                         prediction=predictions[i],
                                         reference=answer)
        graded_outputs.append(grade['score'])

    return graded_outputs

In [56]:
answers_grade, predictions_list = run_evaluation(qa_rag_chain, qna_GT)

In [57]:
answers_grade

([1, 1, None, 1, None],
 [],
 [],
 [{'question': 'What is the purpose of the chief prosecutor for pandemic fraud?',
   'answer': 'To go after the criminals who stole relief money meant for small businesses and Americans.',
   'result': 'The purpose of the chief prosecutor for pandemic fraud is to go after the criminals who stole billions in relief money meant for small businesses and millions of Americans. They will be responsible for investigating and prosecuting cases of fraud related to the misuse of funds intended for pandemic relief.'},
  {'question': 'How much will the deficit be reduced to by the end of this year?',
   'answer': 'Less than half of what it was before the current president took office.',
   'result': "The given context does not provide specific information about the projected reduction of the deficit by the end of this year. Therefore, I don't have the information to answer your question."},
  {'question': "What does the speaker mean by 'capitalism without competi