In [1]:
import os
import openai
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from dotenv import load_dotenv
from nltk.tokenize import word_tokenize
from tqdm import tqdm
import pickle
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer
from operator import itemgetter
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema.runnable import RunnablePassthrough
from langchain.vectorstores import FAISS
import numpy as np

In [2]:
load_dotenv()

# Uncomment to use the default DaVinci Model 
# llm = OpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"))
# model = 'text-davinci-003'

# Uncomment to use the gpt-3.5-turbo-instruct model 
llm = OpenAI(model_name='gpt-3.5-turbo-instruct', openai_api_key = os.getenv("OPENAI_API_KEY"))
model = 'gpt-3.5-turbo-instruct'

In [33]:
import ir_datasets

# Load the BEIR dataset
dataset = ir_datasets.load("beir/arguana")
rel_set = {}
for qrel in dataset.qrels_iter():
    if qrel.query_id not in rel_set:
        rel_set[qrel.query_id] = []
    if qrel.relevance > 0:  # Assuming you want to consider only positive relevance
        rel_set[qrel.query_id].append(qrel.doc_id)

In [3]:
docs_file_path = './backups/openai_embeddings/doc_embeddings_d1.pkl'

with open(docs_file_path, 'rb') as file:
    docs = pickle.load(file)

print("Document embeddings loaded successfully.")

Document embeddings loaded successfully.


In [4]:
queries_file_path = './backups/openai_embeddings/query_embeddings_d1.pkl'

with open(queries_file_path, 'rb') as file:
    queries = pickle.load(file)

print("Queries embeddings loaded successfully.")

Queries embeddings loaded successfully.


In [5]:
docs['test-environment-aeghhgwpe-pro02b']['text']

"You don’t have to be vegetarian to be green. Many special environments have been created by livestock farming – for example chalk down land in England and mountain pastures in many countries. Ending livestock farming would see these areas go back to woodland with a loss of many unique plants and animals. Growing crops can also be very bad for the planet, with fertilisers and pesticides polluting rivers, lakes and seas. Most tropical forests are now cut down for timber, or to allow oil palm trees to be grown in plantations, not to create space for meat production.  British farmer and former editor Simon Farrell also states: “Many vegans and vegetarians rely on one source from the U.N. calculation that livestock generates 18% of global carbon emissions, but this figure contains basic mistakes. It attributes all deforestation from ranching to cattle, rather than logging or development. It also muddles up one-off emissions from deforestation with on-going pollution.”  He also refutes the 

In [13]:
lsi_reduced_docs_file_path = './backups/documents_d1_lsi.pkl'
with open(lsi_reduced_docs_file_path, 'rb') as file:
    documents_reduced = pickle.load(file)

In [15]:
# documents_reduced

In [7]:
lsi_reduced_queries_file_path = './backups/queries_d1_lsi.pkl'
with open(lsi_reduced_queries_file_path, 'rb') as file:
    queries_reduced = pickle.load(file)

In [23]:
# queries_reduced

In [16]:
import numpy as np
from tqdm import tqdm

def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

# Calculate cosine similarity for each query-document pair
similarity_scores = {}
for query_id, query_vector in tqdm(queries_reduced.items(), desc='Computing similarity scores'):
    scores = []
    for doc_id, doc_vector in documents_reduced.items():
        sim_score = cosine_similarity(query_vector, doc_vector)
        scores.append((doc_id, sim_score))
    similarity_scores[query_id] = sorted(scores, key=lambda x: x[1], reverse=True)

# Example usage
# print(similarity_scores['some_query_id'])  # Replace 'some_query_id' with an actual query id

Computing similarity scores: 100%|████████████████████████████████| 1406/1406 [00:07<00:00, 180.86it/s]


In [18]:
# Initialize predictions as a dictionary
predictions = {}

# Iterate over similarity scores
for query_id, scores in similarity_scores.items():
    # Flatten the scores to get only document IDs
    scores_flattened = [doc_id for doc_id, _ in scores]
    predictions[query_id] = scores_flattened

# Example usage
# print(predictions['some_query_id'])  # Replace 'some_query_id' with an actual query id


In [19]:
# # Load index from file
# loaded_faiss_vs = FAISS.load_local(
#     folder_path="./backups/faiss/",
#     embeddings=OpenAIEmbeddings())

# retriever = loaded_faiss_vs.as_retriever(search_kwargs={'k': 10})

# Define the RAG pipeline
template = """
Answer the question or Explain the topic given this additional context: {context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

In [20]:
def format_docs(_docs):
    ls = []
    for doc in _docs:
        if doc.page_content in docs:
            ls.append(docs[doc.page_content]["text"])
    return ls
    # return [docs[int(doc.page_content)]["body"] for doc in _docs]

In [21]:
chain = ({"context": RunnablePassthrough(), "question": RunnablePassthrough()} 
         | prompt 
         | llm 
         | StrOutputParser())

In [22]:
# predictions

In [31]:
mquery_id = 'test-environment-aeghhgwpe-pro02a'
context_str = '\n\n\n'.join([docs[doc_id]['text'] for doc_id in predictions[mquery_id]])[:4096]
input_data = {"context": context_str, "query": queries[mquery_id]['text']}
chain.invoke(input_data)

"Answer: The context and query provided discuss the impact of meat production on the environment. The main argument is that becoming a vegetarian is an environmentally friendly thing to do, as modern farming practices for meat and fish are causing pollution, deforestation, and depletion of resources. This topic ties into the larger issue of the public's right to know what is happening in their name, as meat production is often done in the name of the people and can have significant consequences on the environment. This information should not be filtered by journalists and broadcasters, as it is important for the public to be aware of the effects of their consumption choices."

In [35]:
#### API CALL WARNING #####

rag_responses = {}
loq = []
count = 0
# Run RAG pipeline for every question
for query_id in tqdm(rel_set.keys(), desc = 'Asking Queries to ChatGPT with RAG'):
    if count > 100:
        break
    query_text = queries[query_id]['text']
    context_str = '\n\n\n'.join([docs[doc_id]['text'] for doc_id in predictions[query_id]])[:4096]
    input_data = {"context": context_str, "query": query_text}
    response = chain.invoke(input_data)
    rag_responses[query_id] = response
    count+=1

Asking Queries to ChatGPT with RAG:   7%|█▉                         | 101/1406 [02:47<36:03,  1.66s/it]


In [40]:
print(queries['test-environment-aeghhgwpe-pro02a'])

{'text': "Being vegetarian helps the environment  Becoming a vegetarian is an environmentally friendly thing to do. Modern farming is one of the main sources of pollution in our rivers. Beef farming is one of the main causes of deforestation, and as long as people continue to buy fast food in their billions, there will be a financial incentive to continue cutting down trees to make room for cattle. Because of our desire to eat fish, our rivers and seas are being emptied of fish and many species are facing extinction. Energy resources are used up much more greedily by meat farming than my farming cereals, pulses etc. Eating meat and fish not only causes cruelty to animals, it causes serious harm to the environment and to biodiversity. For example consider Meat production related pollution and deforestation  At Toronto’s 1992 Royal Agricultural Winter Fair, Agriculture Canada displayed two contrasting statistics: “it takes four football fields of land (about 1.6 hectares) to feed each Ca

In [36]:
#### DUMP OVERWRITE WARNING ####

rag_responses_file_path = './backups/lsi_openai_with_rag_responses_d1_' + model + '.pkl'
with open(rag_responses_file_path, 'wb') as file:
    pickle.dump(rag_responses, file)

print(f"RAG responses saved to {rag_responses_file_path}")

RAG responses saved to ./backups/lsi_openai_with_rag_responses_d1_gpt-3.5-turbo-instruct.pkl


In [39]:
openai_with_rag_responses_file_path = './backups/lsi_openai_with_rag_responses_d1_' + model + '.pkl'
with open(openai_with_rag_responses_file_path, 'rb') as file:
    rag_responses = pickle.load(file)

In [41]:
 # Sanity
rag_responses['test-environment-aeghhgwpe-pro02a']

'Answer: The context provided discusses the importance of transparency and the right to information for citizens in a democracy. It argues that citizens have the right to know what actions are being taken in their name, and that journalists and broadcasters should not act as a filter in this process. The failure to provide this information assumes that the public is either too foolish or callous to understand, which is not the role of the media. The argument also touches on the issue of censorship and the dangers of restricting free speech. In the end, the author concludes that the public has the right to know about events that affect their country and its citizens, and that it is the role of the media to report on these events.'

In [42]:
docs['test-environment-aeghhgwpe-pro02b']['text']

"You don’t have to be vegetarian to be green. Many special environments have been created by livestock farming – for example chalk down land in England and mountain pastures in many countries. Ending livestock farming would see these areas go back to woodland with a loss of many unique plants and animals. Growing crops can also be very bad for the planet, with fertilisers and pesticides polluting rivers, lakes and seas. Most tropical forests are now cut down for timber, or to allow oil palm trees to be grown in plantations, not to create space for meat production.  British farmer and former editor Simon Farrell also states: “Many vegans and vegetarians rely on one source from the U.N. calculation that livestock generates 18% of global carbon emissions, but this figure contains basic mistakes. It attributes all deforestation from ranching to cattle, rather than logging or development. It also muddles up one-off emissions from deforestation with on-going pollution.”  He also refutes the 

In [43]:
# Implement BLEU evaluation function
def compute_bleu(references, candidate):
    smoothing = SmoothingFunction().method5
    return sentence_bleu(references, candidate, smoothing_function=smoothing)

# Implement ROUGE evaluation function
def compute_rouge(references, candidate):
    scorer = rouge_scorer.RougeScorer(['rouge1'], use_stemmer=True)
    total_score = 0

    # Compute ROUGE for each reference
    for reference in references:
        scores = scorer.score(reference, candidate)
        total_score += scores['rouge1'].fmeasure

    # Calculate average score
    average_score = total_score / len(references)
    return average_score

# Evaluate BLEU and ROUGE for each query

K = 15 # Number of most relevant docs to consider for scoring performance
total_bleu_score = 0.0
total_rouge_score = 0.0
num_queries = 0

for query_id, relevant_docs in rel_set.items():
    query_text = queries[query_id]['text']
    response = rag_responses[query_id]

    # print(query_id, "\n\n", query_text, "\n\nResponse:\n", response, "\nTopmost relevant Doc:\n", docs[relevant_docs[0]]['text'], "\n======\n")
    
    # Evaluate using BLEU
    bleu_score = compute_bleu([docs[id]['text'] for id in relevant_docs[:K]], response)
    total_bleu_score += bleu_score

    # Evaluate using ROUGE
    rouge_score = compute_rouge([docs[id]['text'] for id in relevant_docs[:K]], response)
    total_rouge_score += rouge_score

    num_queries += 1
    if num_queries == 101:
        break

# Calculate mean scores
mean_bleu_score = total_bleu_score / num_queries
mean_rouge_score = total_rouge_score / num_queries

print(f"Mean BLEU Score: {mean_bleu_score:.4f}")
print(f"Mean ROUGE Score: {mean_rouge_score:.4f}")

Mean BLEU Score: 0.2721
Mean ROUGE Score: 0.2289


In [18]:
# da-vinci
# Mean BLEU Score: 0.8224
# Mean ROUGE Score: 0.2105

# Mean BLEU Score: 0.8377
# Mean ROUGE Score: 0.2226

# gpt-3.5-turbo-instruct

# Mean BLEU Score: 0.7869
# Mean ROUGE Score: 0.2407

# Mean BLEU Score: 0.7937
# Mean ROUGE Score: 0.2425
