# Packages Installation, Library Imports, and Data Imports


In [None]:
!nvidia-smi

In [None]:
%pip install faiss-gpu
!pip install pandas
!pip install nltk
!pip install gensim
!pip install sentence-transformers
!pip install rank_bm25
!pip install accelerate
!pip install bitsandbytes
!pip install torch
!pip install SentencePiece
!pip install evaluate
!pip install bert_score
!pip install chromadb
!pip install transformers
!pip install ctransformers ctransformers[cuda]

In [None]:
import os
import torch
import torch.nn as nn
import bitsandbytes as bnb
import accelerate
import nltk
import pandas as pd
import faiss
import numpy as np
import chromadb
import random
import evaluate
import math
from google.colab import drive
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity
from rank_bm25 import BM25Okapi
from nltk.tokenize import word_tokenize
from gensim.models.doc2vec import Doc2Vec,TaggedDocument
from sklearn.feature_extraction.text import TfidfVectorizer
from chromadb.utils import embedding_functions
from transformers import LlamaTokenizer,GenerationConfig
from sentence_transformers import SentenceTransformer

In [None]:
nltk.download('punkt')
nltk.download('stopwords')

In [None]:
drive.mount('/content/drive', force_remount=True)

In [None]:
class Document:
  def __init__(self, row):
    self.details = row

  @property
  def info(self):
    return self.details

In [None]:
def remove_stopwords(sentence):
    tokens = [word for word in sentence.split() if word.lower() not in set(stopwords.words('english'))]
    return ' '.join(tokens)

In [None]:
class Job(Document):
  def __init__(self, row):
    super().__init__(row)
    self.open_job_href = row["open_job-href"]
    self.job_title = row["job_title_simple"]
    self.company_name = row["company_name"]
    self.address = row["address"]
    self.salary_lb = row["salary_lb"]
    self.salary_hb = row["salary_hb"]
    self. job_desc = row["job_desc"]

  @property
  def job_text(self):
    return (
        f"""Job title: {self.job_title}
        Job link: {self.open_job_href}
        Company name: {self.company_name}
        Address: {self.address}
        Salary: {self.salary_lb} - {self.salary_hb}
        Job Description: {self.job_desc}
        """
        )

In [None]:
class Course(Document):
    def __init__(self, row):
      super().__init__(row)
      self.url = row["url"]
      self.course_desc = row["description"]

    @property
    def course_text(self):
      return (
          f"""Course link: {self.url}
          Course Description: {self.course_desc}
          """
          )

# LLM Models

In [None]:
def load_vicuna():
    from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
    load_in_8bit = True
    model_name = "lmsys/vicuna-7b-v1.5"

    model_v = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16,
        load_in_8bit=load_in_8bit,
        device_map="auto"
        )
    return model_v

In [None]:
def response_gen(input_ids):
  temperature=0.7
  with torch.no_grad():
      generation_output = model_l.generate(
          input_ids=input_ids,
          temperature=temperature,
          top_p = 1.0,
          do_sample=True,
          return_dict_in_generate=True,
          max_new_tokens=300,
      )
  s = generation_output.sequences[0][len(input_ids[0]):]
  output = tokenizer_l.decode(s)
  return output

In [None]:
def load_openhermes():
  from ctransformers import AutoModelForCausalLM
  model_name_l = "TheBloke/OpenHermes-2.5-neural-chat-7B-v3-1-7B-GGUF"

  model_oh = AutoModelForCausalLM.from_pretrained(
    model_name_l,
    model_file="openhermes-2.5-neural-chat-7b-v3-1-7b.Q4_K_M.gguf",
    model_type="mistral",
    gpu_layers=100,
    max_new_tokens = 300,
    context_length = 2048
    )
  return model_oh

# Search Indexing and Similarity Matrix


In [None]:
class IRModel:

  def init_model(self, mode):
    if mode == 'BERT': # vector dim --> (n, 768)
      model_name='bert-base-nli-mean-tokens'
      model = SentenceTransformer(model_name)
    if mode == 'MINILM': # vector dim --> (n, 384)
      model_name= 'all-MiniLM-L6-v2'
      model = SentenceTransformer(model_name)
    elif mode == 'D2V': # vector dim --> (n, 768)
      model = Doc2Vec(vector_size=768, min_count=2, epochs=50)

    self.mode = mode
    return model

  def doc_to_vector(self, model, arr):
      if self.mode == 'BERT' or self.mode == 'MINILM':
        return self.transformer_vector(model, arr)
      if self.mode == 'D2V':
        return self.doc2vec_vector(model, arr)

  def doc2vec_vector(self, model, arr):
    tagged_data = [TaggedDocument(words=word_tokenize(doc.lower()), tags=[str(i)]) for i, doc in enumerate(arr)]

    model.build_vocab(tagged_data)
    model.train(tagged_data, total_examples=model.corpus_count, epochs=model.epochs)

    # get the document vectors
    vectors = np.array([model.infer_vector(word_tokenize(doc.lower())) for doc in arr])
    vector_doc_map = {tuple(vec): doc for vec, doc in zip(vectors, arr)}

    return vectors, vector_doc_map

  def transformer_vector(self, model, arr):
    # create vector embeddings for the documents
    vectors = [model.encode(s) for s in arr]
    vectors = np.array(vectors, dtype=np.float32)
    # Create a dictionary mapping vector to document
    vector_doc_map = {tuple(vec): doc for vec, doc in zip(vectors, arr)}
    return vectors, vector_doc_map

  def word_to_vector(self, model, word):
    if self.mode == 'BERT' or self.mode == 'MINILM':
      return model.encode(word)
    elif self.mode == 'D2V':
      return model.infer_vector(word_tokenize(word.lower()))

  def lsh_index_search(self, wb, xq, vector_doc_map, k):
    d = wb.shape[1]
    nbits = 1000
    # initialize the index using our doc_wb dimensionality (128) and nbits
    index = faiss.IndexLSH(d, nbits)
    # then add the data
    index.add(wb)
    xq0 = xq.reshape(1, -1)

    D, I = index.search(xq0, k=k)

    rel_doc = self.get_relevant_doc(wb[I[0]], vector_doc_map)
    return wb[I[0]], rel_doc

  def bm25_search(self, arr, query_string, vector_doc_map, k):
    # initialize bm25 object and add in documents
    tokenized_corpus = [doc.split(" ") for doc in arr]
    bm25 = BM25Okapi(tokenized_corpus)

    # create vector embedding for query
    tokenized_query = query_string.split(" ")
    doc_scores = bm25.get_scores(tokenized_query)

    # get top n relevant jobs
    relevant_docs = bm25.get_top_n(tokenized_query, arr, n=k)

    # get vector of top n relevant jobs
    relevant_docs_vectors = []
    for doc in relevant_docs:
      for key, val in vector_doc_map.items():
        if val == doc:
          relevant_docs_vectors.append(key)

    return relevant_docs_vectors, relevant_docs

  def setup_chromadb(self,arr,vectors,course_or_job):
    CHROMA_DATA_PATH = "chroma_data/"
    EMBED_MODEL = "all-MiniLM-L6-v2"
    COLLECTION_NAME = course_or_job + str(random.randrange(1000))

    client = chromadb.PersistentClient(path=CHROMA_DATA_PATH)
    # embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=EMBED_MODEL)

    collection = client.create_collection(
        name=COLLECTION_NAME,
        # embedding_function=embedding_func,
        metadata={"hnsw:space": "cosine"},
    )
    collection.add(embeddings=vectors,documents=arr,ids=[f"id{i}" for i in range(len(arr))])
    return collection

  def chromadb_search(self, collection, query_vector, vector_doc_map, k):
    collection = collection
    query_results = collection.query(query_embeddings=query_vector,n_results=k)
    relevant_docs = query_results["documents"][0]
    # get vector of top n relevant jobs
    relevant_docs_vectors = []
    for doc in relevant_docs:
      for key, val in vector_doc_map.items():
        if val == doc:
          relevant_docs_vectors.append(key)

    return relevant_docs_vectors, relevant_docs

  def get_relevant_doc(self, vector_docs, vector_doc_map):
    relevant_documents = []
    # Retrieve the original document based on a vector
    for i in range(len(vector_docs)):
        retrieved_document = vector_doc_map.get(tuple(vector_docs[i]), "Document not found")
        relevant_documents.append(retrieved_document)
    return relevant_documents

  def cos_sim(self, relevant_doc_vector, xq):
    if len(relevant_doc_vector) == 0 :
      return "No document to compare for cosine similarity"
    return cosine_similarity(relevant_doc_vector, [xq])

  def dot_produdct_sim(self, relevant_doc_vector, xq):
    if len(relevant_doc_vector) == 0 :
      raise Exception("No document to compare")
    vector1 = np.array(relevant_doc_vector)
    vector2 = np.array(xq)
    dot_product = np.dot(vector1, vector2)
    return dot_product

In [None]:
def getAnswer(prompt, rel_doc, llm_mode):
    if llm_mode == 'VICUNA':
      output = response_gen(prompt)
    elif llm_mode == 'OPENHERMES':
      output = model_l(prompt, stream=False)

    pred_vector = irModel.word_to_vector(model,output)
    # query_vector = irModel.word_to_vector(model, question)
    similarity = irModel.cos_sim(pred_vector.reshape(1,-1), rel_doc)

    return output, similarity

# Heuristics

In [None]:
class LinearModel(nn.Module):
    def __init__(self, dim_model=50, num_layers=2):
        super().__init__()
        self.num_layers = num_layers
        self.hidden = nn.Linear(dim_model, dim_model)
        self.classification_head = nn.Linear(dim_model, 1)

    def forward(self, src: torch.tensor) -> torch.tensor:
        for i in range(self.num_layers):
          src = self.hidden(src)
          src = torch.sigmoid(src)

        src = self.classification_head(src)
        src = torch.sigmoid(src)
        src = src.squeeze(-1)

        return src

In [None]:
class Heuristic():
  def __init__(self):
    super().__init__()
    checkpoint_path = 'drive/MyDrive/IR_Project/heuristic_checkpoint.pt'
    if os.path.exists(checkpoint_path):
      self.model = LinearModel()
      checkpoint = torch.load(checkpoint_path)
      self.model.load_state_dict(checkpoint['model_state_dict'])
      self.model = self.model.to("cuda")
    else:
      raise ValueError("Cannot find checkpoint path")

  def process(self, label_features, weight=0.5):
    the_feat = self.__process_tokenized(label_features)
    theProb = self.model(the_feat).item()
    outputprob = weight*theProb + 0.5*(1-weight)
    return outputprob

  def __process_tokenized(self, label_features, max_tokens=50):
    label_feature = label_features["input_ids"].to("cuda")
    num_frames = label_feature.shape[1]
    if num_frames < max_tokens:
        padding = torch.zeros(label_feature.shape[0], max_tokens - num_frames).to("cuda")
        label_feature = torch.cat((label_feature, padding), dim=1)
    elif num_frames > max_tokens:
        label_feature = label_feature[:, :max_tokens]
    return label_feature

# Self Consistency

In [None]:
def getPredictionSC(context, question,n):
  # print("Questions:", question)
  ignored_answer = '</s>'
  predicted_answers = {}
  for i in range(n):
    prompt = f"Please answer the question based on the context.{context}\n Question: {question}"

    if llm_mode == 'VICUNA':
      input = tokenizer_l(prompt, return_tensors='pt')
      input_ids = input["input_ids"].to("cuda")
      output = response_gen(input_ids)
    elif llm_mode == 'OPENHERMES':
      output = model_l(prompt, stream=False)

    print(f"Predicted answer at i: {i} ==> {output}")

    if n <= 1:
      return output

    if output != ignored_answer:
      if output not in predicted_answers.keys() :
        predicted_answers[output] = 1
      predicted_answers[output] +=1

  if len(predicted_answers.keys()) != 0:
    return max(predicted_answers, key=predicted_answers.get)
  else:
    return ""

# Iterative Refinement

In [None]:
def getPredictionLOOOP(context, question,rel_doc,n, llm_mode):
  ignored_answer = '</s>'
  predicted_answers = {}
  prompt = f"Please answer the question based on the context.{context}\n Question: {question}"
  if llm_mode == 'VICUNA':
      prompt = tokenizer_l(prompt, return_tensors='pt')
      prompt = prompt["input_ids"].to("cuda")

  for i in range(n):
    # print("----------- Questions with EP:", prompt)
    output, similarity = getAnswer(prompt, rel_doc, llm_mode)

    print(f"Predicted Looop answer ==> {output}")
    print(f"Similarity Looop ==> {similarity}")

    if output != ignored_answer:
      predicted_answers[output] = similarity
      if n <= 1:
        return output, similarity

  if len(predicted_answers.keys()) != 0:
    print("////////// Predicted Looop Answers : ", predicted_answers)
    theKey = max(predicted_answers, key=predicted_answers.get)
    return theKey, predicted_answers[theKey]
  else:
    return "",0

# Emotional Prompt

In [None]:
def getPredictionEP(context, question,rel_doc, n,llm_mode):
  EPS1 = ["This is very important to me.",
          "You'd better be sure.",
          "Embrace challenges as opportunities for growth. Each obstacle you overcome brings you closer to success.",
          "Stay focused and dedicated to your goals. Your consistent efforts will lead to outstanding achievements.",
          "Take pride in your work and give it your best. Your commitment to excellence sets you apart.",
          "Remember that progress is made one step at a time. Stay determined and keep moving forward."]

  EPS2 = [
      "Are you sure?",
      "Are you sure that is your final answer? It might be worth taking another look.",
      "Are you sure that's your final answer? Believe in your abilities and strive for excellence. Your hard work will yield remarkable results.",
  ]

  ignored_answer = '</s>'
  predicted_answers = {}

  prompt = f"Please answer the question based on the context.{context}\n Question: {question}\n {EPS2[0]}"
  if llm_mode == 'VICUNA':
      prompt = tokenizer_l(prompt, return_tensors='pt')
      prompt = input["input_ids"].to("cuda")

  for EP in EPS1[:1]:
    output, similarity = getAnswer(prompt, rel_doc,llm_mode)

    print(f"Predicted EP answer ==> {output}")
    print(f"Similarity EP ==> {similarity}")
    if output != ignored_answer:
      predicted_answers[output] = similarity

    # for EPq in EPS2:
    sec_input = output + EPS2[0]
    if llm_mode == 'VICUNA':
      sec_input = tokenizer_l(sec_input, return_tensors='pt')
      sec_input = sec_input["input_ids"].to("cuda")
    output, similarity = getAnswer(sec_input, rel_doc,llm_mode)
    print(f"Follow up EP answer ==> {output}")
    print(f"Follow up Similarity EP ==> {similarity}")
    if output != ignored_answer:
      predicted_answers[output] = similarity

  if len(predicted_answers.keys()) != 0:
    print("////////// Predicted Answers : ", predicted_answers)
    theKey = max(predicted_answers, key=predicted_answers.get)
    return theKey, predicted_answers[theKey]
  else:
    return "", 0


# Helper functions

In [None]:
def getBERTScore(predictions, ground_truth):
  bertscore = evaluate.load("bertscore")
  bert_score = bertscore.compute(predictions=predictions, references=ground_truth, lang="en")
  print(f"BERTScore: {bert_score}")
  return bert_score

In [None]:
def getRelevantDoc(model, ind_mode, arr,  query_string, wb, map_wb , xq, k, s_mode, collection=None):
  if ind_mode == 'LSH':
    # LSH indexing and get top k relevant documents
    print(f"\n================== {s_mode.upper()} LSH SEARCH ====================\n")
    I, rel_docs = model.lsh_index_search(wb, xq, map_wb, k)
  elif ind_mode == 'BM25':
    # BM25 indexing and get top k relevant documents
    print(f"\n================== {s_mode.upper()} BM25 SEARCH ====================\n")
    I, rel_docs = model.bm25_search(arr, query_string, map_wb, k)
  elif ind_mode == 'CHROMA':
    # CHROMADB indexing and get top k relevant documents
    print(f"\n================== {s_mode.upper()} CHROMADB SEARCH ====================\n")
    I , rel_docs = model.chromadb_search(collection, xq.tolist(), map_wb, k)
    print(rel_docs)
    print(type(rel_docs))

  links = ""
  if s_mode == 'course':
    for d in rel_docs:
      delimiter = "Course Description"
      split_parts = d.split(delimiter, 1)
      links += f"\n {split_parts[0].split('Course link: ')[1]}"
  if s_mode == 'job':
    for d in rel_docs:
      split1 = d.split('Job link: ', 1)
      links += f"\n {split1[1].split('Company name: ', 1)[0]}"

  doc_sim = model.cos_sim(I,xq)

  print(f" {ind_mode} search similarity : {doc_sim}")
  print(f" {ind_mode} search relevant docs :{rel_docs}")

  print(f"\n================== END OF {s_mode.upper()} SEARCH ====================\n")

  return rel_docs, links

In [None]:
def downsize_context(query,rel_docs,no_docs_retrieved,context_size,model_l):
  rel_docs = sorted(rel_docs, key=lambda x: len(x.split())) # smallest to largest
  allocation = context_size//no_docs_retrieved
  new_rel_doc = rel_docs[0]
  temp_list = new_rel_doc.split()
  if len(temp_list) > allocation:
    # new_rel_doc = model_l(f"Please summarise the context to strictly {allocation} words based on this question: {query}\n Context:{rel_docs[0]}")
    temp_list = new_rel_doc.split()
    if len(temp_list) > allocation:
        temp_list = temp_list[:allocation]
        new_rel_doc = ' '.join(temp_list)
  if no_docs_retrieved == 1:
    return "Document: " + new_rel_doc
  else:
    return "Document: " + new_rel_doc + downsize_context(query,rel_docs[1:],no_docs_retrieved-1,context_size-len(temp_list),model_l)

In [None]:
def dataset_setup(shuffle=True,seed=42):
  jobs = pd.read_csv("drive/MyDrive/IR_Project/cleaned_data/jobs_cleaned.csv")
  courses = pd.read_csv("drive/MyDrive/IR_Project/sch_data/all.csv")

  jobs_arr = [Job(jobs.iloc[i]).job_text for i in range(jobs.shape[0])]
  jobs_arr = [remove_stopwords(jobs_arr[i]) for i in range(len(jobs_arr))]
  courses_arr = [Course(courses.iloc[i]).course_text for i in range(courses.shape[0])]
  courses_arr = [remove_stopwords(courses_arr[i]) for i in range(len(courses_arr))]
  random.seed(seed)
  random.shuffle(jobs_arr)
  random.shuffle(courses_arr)
  return courses_arr, jobs_arr

In [None]:
def slice_dataset(courses_arr, jobs_arr, slicing=None):
  jobs_arr = jobs_arr[:slicing]
  courses_arr = courses_arr[:slicing]
  return courses_arr, jobs_arr

# LLM Prompt

In [None]:
# Set your shit here
llm_modes = ['VICUNA', 'OPENHERMES']
enc_modes = ['BERT','MINILM','D2V']
search_modes = ['LSH', 'BM25', 'CHROMA']

llm_mode = llm_modes[0]
encoding_mode = enc_modes[0]
s_mode = search_modes[2]

courses_arrs, jobs_arrs = dataset_setup(shuffle=True,seed=42) # Took 9 mins for me to stopword removal

In [None]:
# Data setup
courses_arr, jobs_arr = slice_dataset(courses_arrs, jobs_arrs, slicing=3000)
####################################
# LLM setup
if llm_mode == 'VICUNA':
  model_l = load_vicuna()
elif llm_mode == 'OPENHERMES':
  model_l = load_openhermes()
tokenizer_l = LlamaTokenizer.from_pretrained("lmsys/vicuna-7b-v1.5")
####################################
# IR model setup
irModel = IRModel()
model = irModel.init_model(encoding_mode)
course_wb, course_wb_vector = irModel.doc_to_vector(model,courses_arr)
job_wb, job_wb_vector = irModel.doc_to_vector(model,jobs_arr)

if s_mode == 'CHROMA':
  jobCol = irModel.setup_chromadb(jobs_arr,job_wb.tolist(),"job")
  courseCol = irModel.setup_chromadb(courses_arr,course_wb.tolist(),"courses")
else:
  jobCol = None
  courseCol = None
####################################
heuristical = Heuristic()

In [None]:
sample_qna = pd.read_csv("drive/MyDrive/IR_Project/Sample qna.csv")
questions = list(sample_qna["Question"].values)
good_ans = list(sample_qna["Good"].values)

index = 0
questions = [questions[index]]
good_answer = [good_ans[index]]
questions

In [None]:
#hyperparameters
total_docs_retrieved = 7
threshold = 0.8
heuristic_weight = 0.7

In [None]:
predictions = []
answers = []
print('\n ///////////////////////////// START OF PREDICTION //////////////////////////////////////// \n')

for question in questions:
  print("Questions:", question)
  tokenised_query = tokenizer_l(question, return_tensors='pt')
  probability_of_token = heuristical.process(tokenised_query,heuristic_weight)
  j_k =  math.floor(total_docs_retrieved*probability_of_token)
  c_k =  total_docs_retrieved-j_k

  query_vector = irModel.word_to_vector(model, question)

  rel_doc_j, links_j = getRelevantDoc(irModel, s_mode, jobs_arr, question, job_wb, job_wb_vector, query_vector, j_k, "job",collection=jobCol)
  rel_doc_c , links_c = getRelevantDoc(irModel, s_mode, courses_arr,  question, course_wb, course_wb_vector, query_vector, c_k, 'course',collection=courseCol)
  rel_doc = rel_doc_j + rel_doc_c
  if llm_mode == "OPENHERMES":
    rel_doc = downsize_context(question,rel_doc,total_docs_retrieved,1536-len(question.split()),model_l) # 0.75 typical token to word ratio
    context = "\n Context: " + rel_doc
  else:
    context = '\n Context: '
    for theD in rel_doc:
      context += theD
  links = links_j + links_c

  rel_doc_vector = irModel.word_to_vector(model,rel_doc)
  prediction, similarity = getPredictionLOOOP("", question, rel_doc_vector, 1, llm_mode)
  # print(prediction)
  # pred_vector = irModel.word_to_vector(model,prediction)

  # similarity = irModel.cos_sim(pred_vector.reshape(1,-1), rel_doc_vector)
  print("\n==== Before Context Prediction : ", prediction ,"====\n")
  print("\n==== Before Context Similarity : ", similarity, "====\n")

  bert_pred = [prediction]
  current_acc = getBERTScore(bert_pred, good_answer)
  print("Before precision: ", {np.mean(current_acc['precision'])})
  print("Before recall: ", {np.mean(current_acc['recall'])})
  print("Before f1: ", {np.mean(current_acc['f1'])})

  if(similarity > threshold):
      predictions.append([prediction, similarity])
      print(f"\n==== Prediction for {question} is {prediction}", "====\n")
  else:
    print('\n ///////////////////////////// STARTING SELF CONSISTENCY WITH CONTEXT //////////////////////////////////////// \n')
    # print('\n ================================ CONTEXT ============================== \n',context)
    # print("\n================================= END OF CONTEXT =================================================\n")

    prediction_with_context, similarity_with_context = getPredictionLOOOP(context, question,rel_doc_vector, 5, llm_mode)
    # prediction_with_context, similarity_with_context = getPredictionEP(context,question, rel_doc_vector, 2, llm_mode)
    # vectors_with_context = irModel.word_to_vector(model,prediction_with_context)
    # similarity_with_context = irModel.cos_sim(vectors_with_context.reshape(1,-1), rel_doc_vector)
    print("\n==== After Context Prediction : ", prediction_with_context,"====\n")
    print("\n==== After Context Similarity : ", similarity_with_context, "====\n")

    if(similarity_with_context > similarity):
      prediction = prediction_with_context + links
      similarity = similarity_with_context
    print(f"\n==== Prediction for {question} is {prediction}", "====\n")
    predictions.append([prediction, similarity])
    bert_pred = [prediction]
    current_acc = getBERTScore(bert_pred, good_answer)
    print("After precision: ", {np.mean(current_acc['precision'])})
    print("After recall: ", {np.mean(current_acc['recall'])})
    print("After f1: ", {np.mean(current_acc['f1'])})


print('\n ///////////////////////////// END OF PREDICTION //////////////////////////////////////// \n')

print("FINAL Prediction " , predictions)
bert_pred = [predictions[i][0] for i in range(len(predictions))]
current_acc = getBERTScore(bert_pred, good_answer)
print(f"Overall mean precision: {np.mean(current_acc['precision'])}")
print(f"Overall mean recall: {np.mean(current_acc['recall'])}")
print(f"Overall mean f1: {np.mean(current_acc['f1'])}")
print("Done")

In [None]:
bert_pred

In [None]:
print(predictions[0])

In [None]:
bert_pred = [predictions[i][0] for i in range(len(predictions))]
current_acc = getBERTScore(bert_pred, good_answer)

In [None]:
print(len(predictions))
for i in range(len(predictions)):
  print(f'=== Answer for Question {i} ===')
  print(f"Prediction: {predictions[i][0]}")
  print(f"Similarity: {predictions[i][1]}")