# Question Answering over Resources
The notebook is based on https://python.langchain.com/en/latest/modules/chains/index_examples/qa_with_sources.html

## Prepare Data

In [1]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings.cohere import CohereEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch
from langchain.vectorstores import Chroma
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
import numpy as np
import os
from langchain.llms import OpenAI

import getpass
from bardapi import Bard
import os

bard_token = getpass.getpass("Enter the token for Google Bard")
openai_token = getpass.getpass("Enter the token for OpenAI")
# Input OpenAI API Key Here
os.environ["OPENAI_API_KEY"] = openai_token

Enter the token for Google Bard········
Enter the token for OpenAI········


In [2]:
import json

with open('test_data.json', 'r') as f:
    data = json.load(f)
data = data['data']
num = len(data)
print(num)
# print(data[0]['paragraphs'][0]['qas'][0])
print(data[0]['paragraphs'][0]['context'])

35
The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.


In [4]:
# ==========
# QUESTION: In what country is Normandy located?
# ANSWER1: France
# ANSWER2: Normandy is located in France.
# RESPONSE: Yes
# ==========
# QUESTION: From which countries did the Norse originate?
# ANSWER1: Denmark, Iceland and Norway 
# ANSWER2: The Norse who originated the Normans came from Denmark, Iceland, and Norway.
# RESPONSE: Yes
# ==========
# QUESTION: What is France a region of?
# ANSWER1: No relevant information found 
# ANSWER2: Sorry, I don't know.
# RESPONSE: Yes
# =========
# QUESTION: What is France a region of?
# ANSWER1: No relevant information found 
# ANSWER2: France is a region of Normandy.
# RESPONSE: No
# =========

#Along with the answer, please also give a confidence score from 1 to 10 about how confident you are about your answer.

eval_text_template = '''
You are not allowed to use any outside knowledge. You can only use what is given to you. Given one question and two answers, please response whether these ANSWER1 and ANSWER2 have the same meaning under the given question. 
The only output you can give is either "Yes" or "No". Please output only one single word. No explanation needed.
If ANSWER1 says "No relevant information found", you can only say "yes" when ANSWER2 also indicates the question cannot be answered. 

QUESTION: {question}
ANSWER1: {answer}
ANSWER2: {res_text}
'''

# ask_template = '''
# Please answer the following question only based on the given context. Do NOT use any outside knowledge. 
# If you are unsure about the answer or you cannot find the information to answer your question, you must say "Sorry, I cannot answer this question"
# ==========
# QUESTION: In what country is Normandy located?
# RESPONSE: France
# ==========

# QUESTION: What is France a region of?
# RESPONSE: Sorry, I cannot answer this question
# =========
# QUESTION: {question}
# RESPONSE: 
# '''

from langchain import PromptTemplate

template = """Answer the question based on the context below. If the
question cannot be answered using the information provided answer
with "I don't know".

Context: {context}

Question: {query}

Answer: """

prompt_template = PromptTemplate(
    input_variables=["context", "query"],
    template=template
)


In [8]:
from langchain.chat_models import ChatOpenAI
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from bardapi import Bard

hallucination = []
acc = []

# model = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo')
model = OpenAI(temperature=0, model_name="gpt-3.5-turbo",)
chain = load_qa_with_sources_chain(model, chain_type="stuff")
# chain = load_qa_with_sources_chain(model, chain_type="map_reduce")
evaluator = Bard(token=bard_token)
for i in range(num):
    for j in range(len(data[0]['paragraphs'])):
        text= data[i]['paragraphs'][j]['context']
#         text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
#         texts = text_splitter.split_text(text)
#         embeddings = OpenAIEmbeddings()
#         docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))])
        for k in range(len(data[i]['paragraphs'][j]['qas'])):
            question = data[i]['paragraphs'][j]['qas'][k]['question']
            if not data[i]['paragraphs'][j]['qas'][k]['is_impossible']:
                answer = data[i]['paragraphs'][j]['qas'][k]['answers'][0]['text']
                pos = data[i]['paragraphs'][j]['qas'][k]['answers'][0]['answer_start']
            else:
                answer = "I don't know"
                pos = None
            query = question
#             docs = docsearch.similarity_search(query, n_results = 1)
#             response = chain({"input_documents": docs, "question": ask_template.format(question=query)}, return_only_outputs=True)
#             res_text = response['output_text'].split('\n')[0]
            in_text = prompt_template.format(context=text, query=question)
            res_text = model(in_text)
            result = evaluator.get_answer(eval_text_template.format(question=question, answer=answer, res_text=res_text))['content']
            if not data[i]['paragraphs'][j]['qas'][k]['is_impossible']:
                if 'yes' in result.lower():
                    acc.append(1)
                else:
                    acc.append(0)
            else:
                if 'yes' in result.lower():
                    hallucination.append(1)
                else:
                    hallucination.append(0)
            print(f"question: {question}\nanswer:{answer} \nresponse:{res_text}\n Correctness:{result}\n\n\n")
        print(f"acc: {np.mean(acc)}, hallucination: {np.mean(hallucination)}\n\n\n")
        break
    break

question: In what country is Normandy located?
answer:France 
response:France.
 Correctness:Yes.

ANSWER1 and ANSWER2 both say that Normandy is located in France. The period at the end of ANSWER2 does not change the meaning of the answer.



question: When were the Normans in Normandy?
answer:10th and 11th centuries 
response:The Normans were in Normandy in the 10th and 11th centuries.
 Correctness:Yes.

ANSWER1 and ANSWER2 both say that the Normans were in Normandy in the 10th and 11th centuries. The only difference is that ANSWER2 includes the word "were", which is not necessary to convey the meaning of the answer.



question: From which countries did the Norse originate?
answer:Denmark, Iceland and Norway 
response:Denmark, Iceland, and Norway.
 Correctness:Yes.

ANSWER1 and ANSWER2 both list Denmark, Iceland, and Norway as the countries from which the Norse originated. The only difference is that ANSWER2 includes a comma after each country name, which is not necessary to convey th

## Bard

In [None]:
from bardapi import Bard
import os



In [None]:
question = "When were the Normans in Normandy?"
response = "No relevant information found"
solution = "The Normans gave their name to Normandy in the 10th and 11th centuries."



Bard(token = token).get_answer(f"Given the question:'{question}', do the following 2 statements have the same meaning? 1.{solution}, 2.{response}. Say yes if true. Otherwise, say no.")['content']



question = "When was the French version of the word Norman first recorded?"
ans1 = "No relevant information found "
ans2 = "The French version of the word Norman was first recorded in Medieval Latin in the 9th century."

res = bard.get_answer(f"Answer the following question in a **single word**. Remember, **a single word**. Based on {question}, does {ans1} mean the same thing as {ans2}?")['content']
print(res)