In [1]:
# Imports
from dotenv import load_dotenv
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import LlamaCpp, OpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.vectorstores import FAISS
from tqdm import tqdm

import os
import pickle

In [2]:
# Load OpenAI's API key
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [3]:
# Load documents' texts
with open("../dataset/documents.pkl", "rb") as f:
    docs = pickle.load(f)

In [4]:
# Load queries' text
with open("../dataset/queries.pkl", "rb") as f:
    queries = pickle.load(f)

In [5]:
# Load ground truth
with open("../dataset/rel_set.pkl", "rb") as f:
    rel_set = pickle.load(f)

### Common Setup

In [6]:
k = 10  # Choose the top-k most relevant documents

In [7]:
# Load FAISS index from file
faiss_vs = FAISS.load_local(
    folder_path="../ir_techniques/faiss/",
    embeddings=OpenAIEmbeddings())
retriever = faiss_vs.as_retriever(search_kwargs={"k": k})

In [8]:
def format_docs(_docs):
    ls = []
    for doc in _docs:
        if int(doc.page_content) in docs:
            ls.append(docs[int(doc.page_content)]["text"])
    return ls

In [9]:
# Define the prompt for the LLM
template = """Answer the question or Explain the topic given this additional context: {context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

### gpt-3.5-turbo-instruct

In [16]:
# Define the LLM
gpt_llm = OpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-3.5-turbo-instruct")

In [17]:
# Define the RAG pipeline
gpt_chain = ({"context": retriever | format_docs, "question": RunnablePassthrough()}  
             | prompt 
             | gpt_llm 
             | StrOutputParser())

In [18]:
# Test a query
# gpt_chain.invoke(queries[1]["text"])

In [19]:
#### API CALL WARNING #####

# Run RAG pipeline for every query
answers_gpt = {}
for query_id in tqdm(rel_set.keys(), desc="Asking Queries to ChatGPT with RAG"):
    response = gpt_chain.invoke(queries[query_id]["text"])
    answers_gpt[query_id] = {"response": response}

Asking Queries to ChatGPT with RAG:   0%|                             | 0/76 [00:04<?, ?it/s]


In [20]:
# Sanity check
queries[1]["text"], answers_gpt[1]["response"]

(' What problems and concerns are there in making up descriptive titles? What difficulties are involved in automatically retrieving articles from approximate titles? What is the usual relevance of the content of articles to their titles?',
 '\nAnswer: There are several problems and concerns in making up descriptive titles for articles in engineering literature. One issue is the proportion of information that is contained in the complete document but cannot be deduced from the title alone. This means that even if a title is descriptive and informative, there may still be important information missing that is only found in the full document. Additionally, there are challenges in dealing with synonyms and syntactical variants in titles when searching indexes.\n\nAnother difficulty is the effectiveness of titles in conveying the relevance of an article to potential users. While titles are important for alerting and information services, they may not accurately represent the content of the 

In [21]:
# Save all responses
with open("../responses/gpt-3.5-turbo-instruct/llm_w_rag_faiss.pkl", "wb") as f:
    pickle.dump(answers_gpt, f)