In [1]:
# Imports
from dotenv import load_dotenv
from langchain.embeddings import LlamaCppEmbeddings
from langchain.llms import LlamaCpp, OpenAI
from tqdm import tqdm

import os
import pickle

In [2]:
# Load OpenAI's API key
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [3]:
# Load queries' text
with open("../dataset/queries.pkl", "rb") as f:
    queries = pickle.load(f)

### da-vinci-0.0.3

In [4]:
# Define the LLM (default is da-vinci-0.0.3)
da_vinci_llm = OpenAI(openai_api_key=OPENAI_API_KEY)

In [5]:
#### API CALL WARNING ####

# Get LLM's response for each query
answers_da_vinci = {}
for i, query in tqdm(queries.items(), desc="Collecting ChatGPT Responses"):
    answers_da_vinci[i] = {"response": da_vinci_llm.invoke(query["text"])}

Collecting ChatGPT Responses:   0%|                                  | 0/112 [00:04<?, ?it/s]


In [6]:
# Sanity check
queries[1]["text"], answers_da_vinci[1]["response"]

(' What problems and concerns are there in making up descriptive titles? What difficulties are involved in automatically retrieving articles from approximate titles? What is the usual relevance of the content of articles to their titles?',
 '\n\n1. Problems and concerns in making up descriptive titles include trying to capture the key points of the article accurately and concisely, as well as ensuring that the title is interesting and attention-grabbing enough for readers. Additionally, descriptive titles should be unique and not easily confused with titles of other articles.\n\n2. Difficulties involved in automatically retrieving articles from approximate titles include ambiguity in the language used in the titles, as well as dealing with spelling and grammar errors in the titles. Additionally, the retrieval algorithm must be able to differentiate between similar titles and identify the correct article.\n\n3. The relevance of the content of articles to their titles typically varies, d

In [7]:
# Save all responses
with open("../responses/da-vinci-0.0.3/llm_wo_rag.pkl", "wb") as f:
    pickle.dump(answers_da_vinci, f)

### gpt-3.5-turbo-instruct

In [8]:
# Define the LLM 
gpt_llm = OpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-3.5-turbo-instruct")

In [9]:
#### API CALL WARNING ####

# Get LLM's response for each query
answers_gpt = {}
for i, query in tqdm(queries.items(), desc="Collecting ChatGPT Responses"):
    answers_gpt[i] = {"response": gpt_llm.invoke(query["text"])}

Collecting ChatGPT Responses:   0%|                                  | 0/112 [00:03<?, ?it/s]


In [10]:
# Sanity check
queries[1]["text"], answers_gpt[1]["response"]

(' What problems and concerns are there in making up descriptive titles? What difficulties are involved in automatically retrieving articles from approximate titles? What is the usual relevance of the content of articles to their titles?',
 '\n\nSome potential problems and concerns in making up descriptive titles include:\n\n1. Balancing accuracy and brevity: Descriptive titles should accurately reflect the content of the article, but also need to be concise and attention-grabbing. This can be a difficult balance to achieve.\n\n2. Subjectivity: Different people may have different interpretations of what constitutes a "descriptive" title. This can lead to confusion and inconsistency in titles.\n\n3. Language barriers: Descriptive titles may not always accurately convey the content of an article to non-native speakers or those unfamiliar with the subject matter. This can make it difficult for them to understand the relevance of the article.\n\n4. Cultural considerations: Certain phrases 

In [11]:
# Save all responses
with open("../responses/gpt-3.5-turbo-instruct/llm_wo_rag.pkl", "wb") as f:
    pickle.dump(answers_gpt, f)

### llama-7b

In [12]:
# Define the LLM 
llama_llm = LlamaCpp(
    model_path="../../experiments/backups/llama.cpp/models/llama-2-7b-chat.ggmlv3.q4_K_S.gguf.bin", 
    n_ctx=4096, 
    verbose=False)
llama_llm.client.verbose = False

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from ../../experiments/backups/llama.cpp/models/llama-2-7b-chat.ggmlv3.q4_K_S.gguf.bin (version unknown)
llama_model_loader: - tensor    0:                token_embd.weight q4_K     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:               output_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    2:                    output.weight q6_K     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_q.weight q4_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.attn_k.weight q4_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    5:              blk.0.attn_v.weight q4_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    6:         blk.0.attn_output.weight q4_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    7:           blk.0.attn_norm.w

In [13]:
# Get LLM's response for each query
answers_llama = {}
for i, query in tqdm(queries.items(), desc="Collecting LLaMa Responses"):
    answers_llama[i] = {"response": llama_llm.invoke(query["text"])}

Collecting LLaMa Responses:   0%|                                    | 0/112 [00:16<?, ?it/s]


In [14]:
# Sanity check
queries[1]["text"], answers_llama[1]["response"]

(' What problems and concerns are there in making up descriptive titles? What difficulties are involved in automatically retrieving articles from approximate titles? What is the usual relevance of the content of articles to their titles?',
 "\n\n1. What problems and concerns are there in making up descriptive titles?\nDescriptive titles can be tricky to create as they need to accurately convey the main idea or focus of an article without being too long or wordy. Some common issues with creating descriptive titles include:\n* Fitting complex information into a limited number of words: Titles are often shorter than the content of an article, so it can be challenging to summarize a lot of information into just a few words.\n* Choosing the right keywords: It's important to use relevant and specific keywords in a title to help readers quickly understand what the article is about. However, choosing the right keywords can be difficult, especially if you're not sure what searchers will use whe

In [15]:
# Save all responses
with open("../responses/llama-7b/llm_wo_rag.pkl", "wb") as f:
    pickle.dump(answers_llama, f)