In [1]:
# https://python.langchain.com/docs/modules/data_connection/vectorstores/
# https://python.langchain.com/docs/integrations/vectorstores/annoy

In [2]:
from dotenv import load_dotenv
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Annoy, FAISS
from langchain.vectorstores.utils import DistanceStrategy
from tqdm import tqdm

import numpy as np
import os
import pickle
import time

In [3]:
load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

In [4]:
docs_file_path = './backups/openai_embeddings/doc_embeddings.pkl'

with open(docs_file_path, 'rb') as file:
    loaded_docs = pickle.load(file)

print("Document embeddings loaded successfully.")

query_file_path = './backups/openai_embeddings/query_embeddings.pkl'

# Load the query embeddings from the file
with open(query_file_path, 'rb') as file:
    loaded_queries = pickle.load(file)

print("Query embeddings loaded successfully.")

Document embeddings loaded successfully.
Query embeddings loaded successfully.


In [5]:
# Load the ground truth from the file
with open("./backups/ground_truth.pkl", "rb") as f:
    ground_truth = pickle.load(f)

print("Grouth truth loaded succesfully.")

Grouth truth loaded succesfully.


In [6]:
# def get_text(doc):
#     combined_text = doc["title"] + " " + doc["body"]
#     combined_text = combined_text.replace("\n", " ")
#     return combined_text

### Metrics

In [7]:
def precision_at_k(ranked_docs, relevant_docs, k=10):
    retrieved_relevant = 0
    for doc_id in ranked_docs[:k]:
        if doc_id in relevant_docs:
            retrieved_relevant += 1
    return retrieved_relevant / k

def recall_at_k(ranked_docs, relevant_docs, k=10):
    retrieved_relevant = sum(1 for doc_id in ranked_docs[:k] if doc_id in relevant_docs)
    return retrieved_relevant / len(relevant_docs) if relevant_docs else 0

def dcg_at_k(scores, k=10):
    return sum(score / np.log2(idx + 2) for idx, score in enumerate(scores[:k]))

def ndcg_at_k(ranked_docs, relevant_docs, k=5):
    ideal_scores = [1 if doc_id in relevant_docs else 0 for doc_id in ranked_docs]
    actual_scores = [1 if doc_id in relevant_docs else 0 for doc_id in ranked_docs[:k]]
    idcg = dcg_at_k(ideal_scores, k)
    dcg = dcg_at_k(actual_scores, k)
    return dcg / idcg if idcg > 0 else 0

### Annoy

In [8]:
annoy_data = []
for doc in loaded_docs:
    annoy_data.append((doc["id"], doc["embedding"]))

In [9]:
%time

annoy_vs = Annoy.from_embeddings(
    text_embeddings=annoy_data, 
    embedding=OpenAIEmbeddings(), 
    metric="dot")

CPU times: user 1 µs, sys: 1 µs, total: 2 µs
Wall time: 2.15 µs


In [10]:
# !mkdir ./backups/annoy/
annoy_vs.save_local("./backups/annoy/")

In [11]:
# Load index from file
loaded_annoy_vs = Annoy.load_local(
    folder_path="./backups/annoy/", 
    embeddings=OpenAIEmbeddings())

In [12]:
# Calculate cosine similarity for each query-document pair
annoy_similarity_scores = {}
execution_times = [] 
for query_id, query in tqdm(loaded_queries.items(), desc="Computing similarity scores"):
    start_time = time.time()
    scores = loaded_annoy_vs.similarity_search_with_score_by_vector(query["embedding"], k=100)
    end_time = time.time()  # Record end time
    execution_time = (end_time - start_time) * 1000  # Convert to milliseconds
    execution_times.append(execution_time)

    new_scores = []
    for score in scores:
        new_scores.append((int(score[0].page_content), score[1]))
    annoy_similarity_scores[query_id] = sorted(new_scores, key=lambda x: x[1], reverse=True)

mean_execution_time = sum(execution_times) / len(execution_times)
print(f"Mean execution time for all queries: {mean_execution_time:.2f} ms")

Computing similarity scores: 100%|████████████████████████| 112/112 [00:00<00:00, 410.71it/s]

Mean execution time for all queries: 2.39 ms





In [13]:
with open("./backups/annoy/annoy_similarity_scores.pkl", "wb") as f:
    pickle.dump(annoy_similarity_scores, f)

In [14]:
annoy_predictions = [0]*(len(annoy_similarity_scores)+1)
for idx, scores in annoy_similarity_scores.items():
    scores_flattened = [doc for doc, score in scores]
    annoy_predictions[idx] = scores_flattened

In [15]:
annoy_mean_precision_at_k = np.mean(
    [precision_at_k(preds, label) for preds, label in zip(annoy_predictions[1:], ground_truth)])
annoy_mean_recall_at_k = np.mean(
    [recall_at_k(preds, label) for preds, label in zip(annoy_predictions[1:], ground_truth)])
annoy_mean_ndcg_at_k = np.mean(
    [ndcg_at_k(preds, label) for preds, label in zip(annoy_predictions[1:], ground_truth)])
annoy_mean_sps = np.mean(
    [precision_at_k(preds, label, 1) for preds, label in zip(annoy_predictions[1:], ground_truth)])

In [16]:
annoy_mean_precision_at_k, annoy_mean_recall_at_k, annoy_mean_ndcg_at_k, annoy_mean_sps

(0.9705357142857144, 0.003116685016974034, 1.0, 0.9821428571428571)

### FAISS

In [17]:
%time

faiss_vs = FAISS.from_embeddings(
    text_embeddings=annoy_data, 
    embedding=OpenAIEmbeddings(),
    distance_strategy=DistanceStrategy.DOT_PRODUCT)

CPU times: user 1e+03 ns, sys: 0 ns, total: 1e+03 ns
Wall time: 2.15 µs


In [18]:
# !mkdir ./backups/faiss/
faiss_vs.save_local("./backups/faiss/")

In [19]:
# Load index from file
loaded_faiss_vs = FAISS.load_local(
    folder_path="./backups/faiss/",
    embeddings=OpenAIEmbeddings())

In [20]:
# Calculate cosine similarity for each query-document pair
faiss_similarity_scores = {}
execution_times = [] 
for query_id, query in tqdm(loaded_queries.items(), desc="Computing similarity scores"):
    start_time = time.time()
    scores = loaded_faiss_vs.similarity_search_with_score_by_vector(query["embedding"], k=100)
    end_time = time.time()  # Record end time
    execution_time = (end_time - start_time) * 1000  # Convert to milliseconds
    execution_times.append(execution_time)

    new_scores = []
    for score in scores:
        new_scores.append((int(score[0].page_content), 1 - score[1]))
    faiss_similarity_scores[query_id] = sorted(new_scores, key=lambda x: x[1], reverse=True)

mean_execution_time = sum(execution_times) / len(execution_times)
print(f"Mean execution time for all queries: {mean_execution_time:.2f} ms")

Computing similarity scores: 100%|███████████████████████| 112/112 [00:00<00:00, 1248.71it/s]

Mean execution time for all queries: 0.68 ms





In [21]:
with open("./backups/faiss/faiss_similarity_scores.pkl", "wb") as f:
    pickle.dump(faiss_similarity_scores, f)

In [22]:
faiss_predictions = [0]*(len(faiss_similarity_scores)+1)
for idx, scores in annoy_similarity_scores.items():
    scores_flattened = [doc for doc, score in scores]
    faiss_predictions[idx] = scores_flattened

In [23]:
faiss_mean_precision_at_k = np.mean(
    [precision_at_k(preds, label) for preds, label in zip(faiss_predictions[1:], ground_truth)])
faiss_mean_recall_at_k = np.mean(
    [recall_at_k(preds, label) for preds, label in zip(faiss_predictions[1:], ground_truth)])
faiss_mean_ndcg_at_k = np.mean(
    [ndcg_at_k(preds, label) for preds, label in zip(faiss_predictions[1:], ground_truth)])
faiss_mean_sps = np.mean(
    [precision_at_k(preds, label, 1) for preds, label in zip(faiss_predictions[1:], ground_truth)])

In [24]:
faiss_mean_precision_at_k, faiss_mean_recall_at_k, faiss_mean_ndcg_at_k, faiss_mean_sps

(0.9705357142857144, 0.003116685016974034, 1.0, 0.9821428571428571)