In [2]:
!pip install behave==1.2.6

[0m

In [38]:
import os
import json
import pickle
from abc import ABC, abstractmethod
from typing import Tuple, List

from sentence_transformers import SentenceTransformer

from behave import given

from tqdm.notebook import tqdm

In [18]:
ARTEFACT_VERSION = '01'

In [19]:
ARTEFACT_ROOT_FOLDER = os.environ.get('ARTEFACT_ROOT_FOLDER', '/artefact')
ARTEFACT_FOLDER = os.path.join(ARTEFACT_ROOT_FOLDER, 'eberron', f'v{ARTEFACT_VERSION}')

# Load the Artefact

In [20]:
with open(os.path.join(ARTEFACT_FOLDER, 'embeddings.pkl'), 'rb') as f:
    embeddings = pickle.load(f)


In [21]:
with open(os.path.join(ARTEFACT_FOLDER, 'model_metadata.json'), 'r') as f:
    model_metadata = json.load(f)
assert model_metadata['embedding_model']['str'].startswith('SentenceTransformer')

In [22]:
with open(os.path.join(ARTEFACT_FOLDER, 'chunk_metadata.json'), 'r') as f:
    chunk_metadata = json.load(f)


In [25]:
file_names = [f for f in os.listdir(os.path.join(ARTEFACT_FOLDER, 'chunks')) if f.endswith('.md')]
file_names = sorted(file_names)
chunks = [None] * len(file_names)
for file_name in tqdm(file_names):
    file_path = os.path.join(ARTEFACT_FOLDER, 'chunks', file_name)
    with open(file_path, 'r') as f:
        chunks[int(file_name.split('.')[0])] = f.read()

  0%|          | 0/7639 [00:00<?, ?it/s]

# Load the Embedding Model

In [28]:
embedding_model = SentenceTransformer(model_metadata['embedding_model']['name'], 
                                      trust_remote_code=True, 
                                      revision=model_metadata['embedding_model']['revision'])
embedding_model = embedding_model.to("cpu")

2025-01-14 02:21:42.080578: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-14 02:21:42.095865: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-14 02:21:42.114739: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-14 02:21:42.120431: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-14 02:21:42.134174: I tensorflow/core/platform/cpu_feature_guar

# Evaluate

## Define the Test

In [47]:
class Retriever(ABC):
    def retrieve(self, query: str, k: int) -> Tuple[list, list]:
        pass

In [48]:
class EmbeddingsRetreiver(Retriever):
    def __init__(self, model: SentenceTransformer, embeddings, metadata: List[dict]):
        self.model = model
        self.embeddings = embeddings
        self.metadata = metadata

    def retreive(self, query: str, k: int):
        query_embed = embedding_model.encode(query, normalize_embeddings=True)
        similarities = torch.from_numpy(np.dot(embeddings, query_embed.T))
        document_indices = similarities.topk(k).indices.tolist()
        document_metadata = [self.metadata[i] for i in chunk_metadata]

        return document_indices, document_metadata

retreiver = EmbeddingsRetreiver(embedding_model, embeddings, chunk_metadata)

In [29]:
from behave import given, when, then


@given('an embedding model and document embeddings')
def step_impl(context):
    context.retriever = EmbeddingsRetreiver(embedding_model, embeddings, chunk_metadata)

@when('I query {query}')
def step_impl(context, query):
    context.query = query
    context.indices, context.metadata = context.reteriever.retreive(query, 10)

@then('one of the top {k:d} retrieved documents should include {keyword} in their title')
def step_impl(context, expected):
    assert context.result == expected