# Document Question Answering end-2-end Inference Pipeline

In [None]:
!pip install transformers accelerate xformers bitsandbytes openai pip install pinecone-client langchain tiktoken unstructured[pdf] datasets

In [1]:
import os
import openai
from langchain.document_loaders import DirectoryLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

In [2]:
from openai import RateLimitError
import backoff

@backoff.on_exception(backoff.expo, RateLimitError)
def completions_with_backoff(**kwargs):
  response = openai.Completion.create(**kwargs)
  return response

In [3]:
import warnings
warnings.filterwarnings("ignore")

# LLM Pipelines

In [4]:
import torch
from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer, BitsAndBytesConfig
import transformers

from huggingface_hub import login

TOKEN = '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'

CAUSALLM_MODELS = [
    "meta-llama/Llama-2-7b-hf",
    "decapoda-research/llama-7b-hf",
    "ybelkada/falcon-7b-sharded-bf16",
    "facebook/galactica-1.3b",
    "gpt2",
    "microsoft/biogpt",
    "mistralai/Mistral-7B-v0.1",
    "bn22/Mistral-7B-Instruct-v0.1-sharded",
    "vilsonrodrigues/falcon-7b-sharded",
    "TinyPixel/Llama-2-7B-bf16-sharded",
    "stanford-crfm/BioMedLM",
]  # Define model names that are CausalLM models

SEQ2SEQ_MODELS = [
    "THUDM/glm-2b",
    "google/flan-t5-small",
    "google/flan-t5-base",
    "razent/SciFive-base-Pubmed",
    "GanjinZero/biobart-v2-base",
]  # Define model names that are Seq2Seq models

def create_pipeline(model_name):

    device = "cuda:0" if torch.cuda.is_available() else "cpu"

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
    ) if device=='cuda:0' else None

    login(token=TOKEN)

    if model_name in CAUSALLM_MODELS:
        # Configure the pipeline for Causal Language Modeling

        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            quantization_config=bnb_config,
            trust_remote_code=True
        )
        model.config.use_cache = False

        tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
        tokenizer.pad_token = tokenizer.eos_token

        pipeline = transformers.pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            torch_dtype=torch.float16,
            trust_remote_code=True,
            device_map='auto',
        )
    elif model_name in SEQ2SEQ_MODELS:
        # Configure the pipeline for Sequence-to-Sequence Language Modeling
        model = AutoModelForSeq2SeqLM.from_pretrained(
            model_name,
            quantization_config=bnb_config,
            trust_remote_code=True
        )

        tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

        pipeline = transformers.pipeline(
            "text2text-generation",
            model=model,
            tokenizer=tokenizer,
            torch_dtype=torch.float16,
            trust_remote_code=True,
            device_map='auto',
        )
    else:
        raise ValueError(f"Unsupported model_name: {model_name}")

    return pipeline


# Dataset

## Fall '22 ML (CS 559-A) Lectures

In [5]:
!unzip lectures.zip

Archive:  lectures.zip
  inflating: lectures/W1_Introduction_cs559_Fall22.pdf  
  inflating: lectures/W10_GraphicalModels_cs559_Fall22.pdf  
  inflating: lectures/W11_NeuralNetworks_cs559_Fall22.pdf  
  inflating: lectures/W12_NeuralNetworks2_cs559_Fall22.pdf  
  inflating: lectures/W13_FinalReview_cs559_Fall22.pdf  
  inflating: lectures/W2_LinearRegression_cs559_Fall22.pdf  
  inflating: lectures/W3_LDA_cs559_Fall22.pdf  
  inflating: lectures/W4_LogisticRegression_cs559_Fall22.pdf  
  inflating: lectures/W5_PCA_cs559_Fall22.pdf  
  inflating: lectures/W6_SVM_cs559_Fall22.pdf  
  inflating: lectures/W7_DecisionTrees_Ensemble_cs559_Fall22.pdf  
  inflating: lectures/W8_NonparametricMethods_cs559_Fall22.pdf  
  inflating: lectures/W9_Clustering_EM_cs559_Fall22.pdf  


In [6]:
directory = '/content/lectures'   #keep multiple files (.txt, .pdf) in data folder.

def load_docs(directory):
  loader = DirectoryLoader(directory)
  documents = loader.load()
  return documents

ml_lectures_documents = load_docs(directory)
len(ml_lectures_documents)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


13

In [8]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def split_docs(documents, chunk_size=1000, chunk_overlap=20):
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  docs = text_splitter.split_documents(documents)
  return docs

ml_lectures_chunks = split_docs(ml_lectures_documents)
print(len(ml_lectures_chunks))

222


In [9]:
print(ml_lectures_chunks[0].page_content)

CS 559 Machine Learning Week 3: Linear Classification

Ping Wang Department of Computer Science Stevens Institute of Technology

Today’s Lecture

Generative vs Discriminative Classification

Linear Discriminant Analysis

Least Square Classification

Fisher's Linear Discriminant

The Perceptron Algorithm

2

Classification Task

Task: find a function 𝑓 that classifies examples into a given set of discrete classes {𝐶!, 𝐶", … , 𝐶#}.

Training image data

Monkey

Testing image data

Dog

Cat

Cat

Features/Attributes (𝑥)

Function f

𝑦 ∈ {𝐶!, 𝐶", … , 𝐶#}

3

Linear Classification

Decision boundaries/surfaces: divide the input space into decision regions. • For linear classification, the decision boundaries are linear functions of the input 𝑥. • Linear separable: datasets that can be exactly separated by linear decision boundaries are linear separable.

4

Generative and Discriminative Approach

5

Decision Theory for Classification


## Narrative QA

In [25]:
from datasets import load_dataset

# load narrative qa dataset
narrative_qa = load_dataset("narrativeqa")

In [26]:
narrative_qa

DatasetDict({
    train: Dataset({
        features: ['document', 'question', 'answers'],
        num_rows: 32747
    })
    test: Dataset({
        features: ['document', 'question', 'answers'],
        num_rows: 10557
    })
    validation: Dataset({
        features: ['document', 'question', 'answers'],
        num_rows: 3461
    })
})

# Sentence Embeddings

In [None]:
!pip install sentence-transformers

In [60]:
from langchain.embeddings import SentenceTransformerEmbeddings

model_name = "all-MPNet-base-v2"
embeddings = SentenceTransformerEmbeddings(model_name=model_name)

query_result = embeddings.embed_query("Hello world You")
print(f'{len(query_result)} dimensional embedding')

768 dimensional embedding


# Vector Database

In [5]:
import pinecone
from langchain.vectorstores import Pinecone

pinecone.init(
    api_key="719de708-85e8-47bb-b69e-7bcd05106008",
    environment="gcp-starter"
)

index_name = "doc-qa-tut"
index_config = pinecone.Index(index_name)

# Fall '22 ML (CS 559-A) Lectures to Vector DB

In [9]:
ml_lectures_index = Pinecone(index_config, embeddings.embed_query, 'text', namespace='ml_lectures')

In [None]:
ml_lectures_index.add_documents(ml_lectures_chunks)

# Narrative QA to Vector DB

In [24]:
narrative_qa_index = Pinecone(index_config, embeddings.embed_query, 'text', namespace='narrative_qa')

In [27]:
num_samples = 10

added = []

for i, doc in enumerate(narrative_qa['test']['document']):
  if (doc['text'] not in added) and (len(added) < num_samples):
    added.append(doc['text'])


In [None]:
added[0]

In [36]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

chunk_size = 1000
chunk_overlap = 20

text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)

for i, doc in enumerate(added):
  chunks = text_splitter.split_text(doc)
  print(f'{i}: {len(chunks)} chunks')
  narrative_qa_index.add_texts(chunks)

0: 70 chunks
1: 679 chunks
2: 275 chunks
3: 269 chunks
4: 221 chunks
5: 257 chunks
6: 1694 chunks
7: 95 chunks
8: 817 chunks
9: 89 chunks


# Question Answering

In [10]:
def get_similiar_docs(query, index, k=2, score=False):  # we can control k value to get no. of context with respect to question.
  if score:
    similar_docs = index.similarity_search_with_score(query, k=k)
  else:
    similar_docs = index.similarity_search(query, k=k)
  return similar_docs

In [11]:
def get_answer_chain(chain, query, index):
  similar_docs = get_similiar_docs(query, index)
  print(similar_docs)
  try:
    answer = chain.run(input_documents=similar_docs, question=query)
  except:
    answer = chain.run(input_documents=[similar_docs[0]], question=query)

  return answer

In [12]:
def get_prompt(question, index):
  similar_docs = get_similiar_docs(question, index)

  display(similar_docs)

  comprehension = "\n".join([doc.page_content for doc in similar_docs])
  instruction = "Assume that all you have is the above information, based on and from which you have to provide an answer to the following question: (It's okay if you don't know you can state that you don't know)"

  prompt = f"Information: \n{comprehension} \n{instruction} \n{question} \nAnswer:"

  return prompt

In [20]:
def get_answer_pipeline(pipeline, question, index, model='decoder'):
  prompt = get_prompt(question, index)

  # print(prompt)

  generated_text = pipeline(prompt, **{'max_new_tokens' : 100,
                    'do_sample':True,
                    'penalty_alpha': 0.1,
                    'top_k':1,
                    'use_cache':True,
                    'temperature':0.01,
                    'num_return_sequences':1,
                    'no_repeat_ngram_size': 4})

  if model=='encoder-decoder':
    return generated_text[0]['generated_text']
  else:
    return generated_text[0]['generated_text'].split(prompt)[1]

In [14]:
def shrink_text(text):
    lines = text.split('\n')
    return ' '.join(line.strip() for line in lines)

## LangChain's Internal QA Mechanism

## Open AI's text-davinci-003

In [42]:
from langchain.llms import OpenAI
model_name = "text-davinci-003"
llm = OpenAI(model_name=model_name, openai_api_key='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')

chain = load_qa_chain(llm, chain_type="stuff")

In [None]:
import pandas as pd

ml_lecture_queries = ["What is the formula for PCA?",
           "How have relations between India and the US improved?",
           "Is PCA better or SVD?",
           "Is there any mention of Prompt Engineering?",
           "Is there any mention of PCA and in which pdf and page?"]

ml_lecture_answers = [get_answer_chain(chain, query, ml_lectures_index) for query in ml_lecture_queries]


In [55]:
pd.set_option('display.max_colwidth', None)
df = pd.DataFrame({'Query':ml_lecture_queries, 'Answer': ml_lecture_answers})
df['Answer'] = df['Answer'].apply(shrink_text)
df.style.set_properties(**{'text-align':'left','white-space': 'pre-wrap'}).hide_index()

Query,Answer
What is the formula for PCA?,"There is no single formula for PCA. PCA is a technique that involves several steps, including calculating the empirical mean, finding the eigenvectors and eigenvalues of the covariance matrix, rearranging the eigenvectors and eigenvalues, computing the cumulative explained variance for each eigenvector, and selecting a subset of the eigenvectors as basis vectors."
How have relations between India and the US improved?,I don't know.
Is PCA better or SVD?,It depends on the application. PCA is typically used to find the principal components of a data set and to represent them as a linear combination of the original features. SVD is typically used to reduce the dimensionality of a matrix.
Is there any mention of Prompt Engineering?,"No, there is no mention of Prompt Engineering."
Is there any mention of PCA and in which pdf and page?,"Yes, PCA is mentioned in PDF 10 on page 9."


## Open Source LLM - Mistral-7B

In [15]:
from langchain.llms.huggingface_pipeline import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=create_pipeline("mistralai/Mistral-7B-v0.1"))

chain = load_qa_chain(llm, chain_type="stuff")

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
import pandas as pd

ml_lecture_queries = ["What is the formula for PCA?",
           "How have relations between India and the US improved?",
           "Is PCA better or SVD?",
           "Is there any mention of Prompt Engineering?",
           "Is there any mention of PCA and in which pdf and page?"]

ml_lecture_answers = [get_answer_chain(chain, query, ml_lectures_index) for query in ml_lecture_queries]


In [17]:
pd.set_option('display.max_colwidth', None)
df = pd.DataFrame({'Query':ml_lecture_queries, 'Answer': ml_lecture_answers})
df['Answer'] = df['Answer'].apply(shrink_text)
df.style.set_properties(**{'text-align':'left','white-space': 'pre-wrap'}).hide_index()

Query,Answer
What is the formula for PCA?,
How have relations between India and the US improved?,
Is PCA better or SVD?,
Is there any mention of Prompt Engineering?,Yes
Is there any mention of PCA and in which pdf and page?,


## HuggingFace Pipeline for QA

In [18]:
pipeline = create_pipeline("mistralai/Mistral-7B-v0.1")

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## On Fall '22 ML (CS 559-A) Lectures

In [None]:
import pandas as pd

ml_lecture_queries = ["What is the formula for PCA?",
           "How have relations between India and the US improved?",
           "Is PCA better or SVD?",
           "Is there any mention of Prompt Engineering?",
           "Is there any mention of PCA and in which pdf and page?"]

ml_lecture_answers = [get_answer_pipeline(pipeline, query, ml_lectures_index) for query in ml_lecture_queries]

In [23]:
pd.set_option('display.max_colwidth', None)
df = pd.DataFrame({'Query':ml_lecture_queries, 'Answer': ml_lecture_answers})
df['Answer'] = df['Answer'].apply(shrink_text)
df.style.set_properties(**{'text-align':'left','white-space': 'pre-wrap'}).hide_index()

Query,Answer
What is the formula for PCA?,29 PC: Steps (cont.) Input data matrix • Steps • Output: transformed coordinates 30 PC A: Steps 1 Input data: 📁 31 PC : Steps 2 Compute the mean of the data: ℳ 32 PC 3: Steps A Compute the covariate matrix: 𐱁
How have relations between India and the US improved?,35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 ```
Is PCA better or SVD?,"25 PC: Steps (cont.) Input data matrix • Steps • Output: Assume you have the above information and you have to answer the following question (It' s okay if you do not know, you can state you do not have enough information to answer): Is PCR better or PCA? Answer 26 PCR: Steps • Input: data matrix, a set of explanatory"
Is there any mention of Prompt Engineering?,No 27 Evaluation / Objectives ØEvaluation function: a function that takes a model and a dataset and returns a number. 28 Evaluating a Model ØA model is evaluated by a function that measures how well the model fits the data. ØThe evaluation function is a function of the model and the data. The model is a function that maps inputs to outputs. ØEvaluation functions are often called objective functions
Is there any mention of PCA and in which pdf and page?,"29 PC: Advantagess PC is a linear dimensionality-reduction technique; PC can be used to reduce the dimensionality of the data; It is a linear transformation of the data, so it is easy to interpret; In the case of a large number of variables, it is possible to reduce the number of variables to a smaller number; If the data are correlated, it is also possible to reduce"


## On Narrative QA

In [39]:
from torch.utils.data import DataLoader


narrative_dataloader = DataLoader(narrative_qa['test'], batch_size=1, shuffle=False)

naarative_qa_questions = []
naarative_qa_answers = []

for i, sample in enumerate(narrative_dataloader):
  if i < 305:
    query = sample['question']['text'][0]
    answers = sample['answers']

    naarative_qa_questions.append(query)
    naarative_qa_answers.append(answers)

In [40]:
naarative_qa_questions[0], \
naarative_qa_answers[0]

('Who is Mark Hunter?',
 [{'text': ['He is a high school student in Phoenix.'],
   'tokens': [('He',),
    ('is',),
    ('a',),
    ('high',),
    ('school',),
    ('student',),
    ('in',),
    ('Phoenix',),
    ('.',)]},
  {'text': ['A loner and outsider student with a radio station.'],
   'tokens': [('A',),
    ('loner',),
    ('and',),
    ('outsider',),
    ('student',),
    ('with',),
    ('a',),
    ('radio',),
    ('station',),
    ('.',)]}])

## Open AI's text-davinci-003

In [44]:
from langchain.llms import OpenAI
model_name = "text-davinci-003"
llm = OpenAI(model_name=model_name, openai_api_key='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')

chain = load_qa_chain(llm, chain_type="stuff")

In [None]:
predictions = []

for query, ground_truths in zip(naarative_qa_questions, naarative_qa_answers):
    prediction = get_answer_chain(chain, query, narrative_qa_index)
    predictions.append(prediction)
    print(f"\nQ: {query} \nA: {prediction}\nT: {ground_truths} \n")


In [46]:
extract_answers = lambda answers: '\n'.join([answer['text'][0] for answer in answers])

In [47]:
pd.set_option('display.max_colwidth', None)
df = pd.DataFrame({'Query':naarative_qa_questions, 'Prediction':predictions, 'Answer': [extract_answers(answers) for answers in naarative_qa_answers]})
df['Answer'] = df['Answer'].apply(shrink_text)
df.style.set_properties(**{'text-align':'left','white-space': 'pre-wrap'}).hide_index()

Query,Prediction,Answer
Who is Mark Hunter?,Mark Hunter is a character in the context.,He is a high school student in Phoenix. A loner and outsider student with a radio station.
Where does this radio station take place?,It is not specified where the radio station takes place in this context.,"It takes place in Mark's parents basement. Phoenix, Arizona"
Why do more students tune into Mark's show?,"More students tune into Mark's show because it is unregulated and provides programming of the lowest common denominator, allowing for more freedom of expression.",Mark talks about what goes on at school and in the community. Because he has a thing to say about what is happening at his school and the community.
Who commits suicide?,The person in the context commits suicide.,Malcolm. Malcolm.
What does Paige jam into her microwave?,Joanna doesn't jam anything into her microwave.,She jams her medals and accolades. Her award medals
What does Mark do with his radio station?,"Mark listens to the show broadcast from Paradise Hills, Arizona, and burns his Happy Harry Hardon letters.",He dismantles it and attaches it to his mother's jeep. Dismantle it.
What does Mark tell the protesting students?,Mark does not tell the protesting students anything. He and Nora leave and an announcement comes over the P.A. system about an emergency PTA. meeting that evening.,He tells them to make their own future. That they should make their own future because the world belongs to them.
Who gets arrested?,The robbers.,Mark and Nora. Mark and Nora.
What does the radio show cause?,"The radio show is causing disruption in the community, which has led to vandalism.",It causes trouble. It causes much trouble in the community.
Where does Mark Broadcast his station from?,"Mark does not broadcast a station. He is listening to Shep Sheppard broadcasting from Paradise Hills, Arizona.",Parent's Basement At the basement of his home


### Evaluation

In [None]:
!pip install evaluate rouge_score

In [53]:
import evaluate

score = evaluate.load("rouge")
open_ai_score = score.compute(references=[answers[0]['text'][0] for answers in naarative_qa_answers], predictions=predictions)

open_ai_score


{'rouge1': 0.10212243184280043,
 'rouge2': 0.017970118727490703,
 'rougeL': 0.09781450362134025,
 'rougeLsum': 0.09767301241302143}

## HuggingFace Mistral-7B

In [None]:
predictions = []

for query, ground_truths in zip(naarative_qa_questions[:100], naarative_qa_answers[:100]):
    prediction = get_answer_pipeline(pipeline, query, narrative_qa_index)
    predictions.append(prediction)
    print(f"\nQ: {query} \nA: {prediction}\nT: {ground_truths} \n")


In [57]:
pd.set_option('display.max_colwidth', None)
df = pd.DataFrame({'Query':naarative_qa_questions[:100], 'Prediction':predictions, 'Answer': [extract_answers(answers) for answers in naarative_qa_answers[:100]]})
df['Answer'] = df['Answer'].apply(shrink_text)
df.style.set_properties(**{'text-align':'left','white-space': 'pre-wrap'}).hide_index()

Query,Prediction,Answer
Who is Mark Hunter?,Mark Hunter is a 15 year old boy who is a student at the Hunter School. He is the son of Marla and Brian Hunter. He is a very intelligent boy who is very good at maths and science. He is also very good at English and is a very good writer. He is very good friends with his neighbour Nora. He is not very good at sports. He is an only child. He is currently in the 10th grade. He is,He is a high school student in Phoenix. A loner and outsider student with a radio station.
Where does this radio station take place?,```,"It takes place in Mark's parents basement. Phoenix, Arizona"
Why do more students tune into Mark's show?,"Mark's Show - This is Mark's Show, the show that's not afraid to say what you're thinking. Mark (Voice Over) - This is the show that says what you've always wanted to say. (Mark's voice over) - This show is the show you've been waiting for. This is the show for you. The show that'",Mark talks about what goes on at school and in the community. Because he has a thing to say about what is happening at his school and the community.
Who commits suicide?,1. A person who is depressed. 2. A person with a mental illness. 3. A person whose life is in danger. 4. A person in a state of despair. 5. A person under the influence of drugs. 6. A person suffering from a terminal illness. 7. A person having a nervous breakdown. 8. A person feeling lonely. 9. A person facing a difficult,Malcolm. Malcolm.
What does Paige jam into her microwave?,(The following information is for those who want to know the answer to the question) The answer is: Eggs. (This is the end of the quiz) The following is a list of the questions that were asked in the quiz. (The questions are in the order that they were asked in) (This list is for those of you who want to see the questions that you,She jams her medals and accolades. Her award medals
What does Mark do with his radio station?,Mark burns his radio station. Explanation: The answer is based on the following information: 1. Mark is a loner. 2. Mark is not a communicator. 3. Mark is angry. 4. Mark is depressed. 5. Mark is in a bad mood. 6. Mark is unhappy. 7. Mark is sad. 8. Mark is lonely. 9. Mark is isolated. 1,He dismantles it and attaches it to his mother's jeep. Dismantle it.
What does Mark tell the protesting students?,Murdoch - We're not going to let you do this. Loretta - We've got the right to protest. Paige - We'll be arrested. Maz - I'm not going to jail. Jenny - I'll go to jail. <,He tells them to make their own future. That they should make their own future because the world belongs to them.
Who gets arrested?,A. B. C. D. E. F. G. H. I. J. K. L. M. N. O. P. Q. R. S. T. U. V. W. X.,Mark and Nora. Mark and Nora.
What does the radio show cause?,"Bob - I'm sorry, I'm just not sure what to do. Mrs. Watts - I've been thinking about it and I think I've got a solution. Mr. Watts- What's that? Mother - I think we should move. Father - Move? Mr Watts - Where?  It causes trouble. It causes much trouble in the community.",
Where does Mark Broadcast his station from?,"Happy Harry - This is Happy Harry Hard-on, the man who's got the balls to say what you want to hear. Mike - I'm Mike, I'm a senior at Paradise Hills High School. Harry - What's your problem? Michael - I've got a problem with the school. Dave - I'",Parent's Basement At the basement of his home


In [58]:
import evaluate

score = evaluate.load("rouge")
mistral_score = score.compute(references=[answers[0]['text'][0] for answers in naarative_qa_answers[:100]], predictions=predictions)

mistral_score


{'rouge1': 0.03196249883588722,
 'rouge2': 0.003801010516952086,
 'rougeL': 0.0296914165128762,
 'rougeLsum': 0.030472087413351806}

# Conclusions

- LangChain's internal question answering mechanism works very well for OpenAI API however, doens't perform that well for HuggingFace Pipelines
- OpenAI's text-davnici-003 performed 3 times better than Mistral-7B
- Both OpenAI and Mistral-7B have significant scope for improvement