In [None]:
!pip install deasy-client

In [None]:
import os

import numpy as np
from dotenv import load_dotenv
from fastembed import TextEmbedding

from deasy_client import Deasy

load_dotenv()

### Deasy Meta-Filter Demo

##### 1. Define tags you want to filter on and instantiate the Deasy Client

In [None]:
deasy_api_key = os.environ["DEASY_API_KEY"] # You get in the Deasy Tokens Dashboard
username = "<username>" # Your Deasy username

vdb_profile_name = "<vdb_profile_name>" # name of the vdb profile/datasource you want to use

client = Deasy(
    x_user=username,
    x_token=deasy_api_key
)

##### 2. Use Deasy SDK to get tag schemas 

In [None]:
tag_schemas = client.tags.list().tags

##### 3. Ask your question. You can define what you expect as responses in the "columns" attribute 
###### id, filename, text, dense->dense embeddings

In [None]:
question = "How can I use Deasy's tag schemas and metadata filtering to improve the relevance of my retrieval results?"


response = client.metadata.deasy_select.query(
    query=question,
    vdb_profile_name=vdb_profile_name,
    columns=["id", "filename", "text", "dense"],
    tag_schemas=tag_schemas
)

In [None]:
response

##### 4. (OPTIONAL) In case you want to rerank your results, you must use fastembed with a vector dimension of 384 defined below out of the box

In [None]:
top_k = 5

embedder =TextEmbedding(
    model_name="BAAI/bge-small-en-v1.5",
)

def cosine_similarity(v1, v2):
    if len(v1.shape) > 1:
        v1 = v1.flatten()
    if len(v2.shape) > 1:
        v2 = v2.flatten()
    
    dot_product = np.dot(v1, v2)
    
    norm_v1 = np.linalg.norm(v1)
    norm_v2 = np.linalg.norm(v2)
    
    return dot_product / (norm_v1 * norm_v2)

def rerank_results(question, results):
    question_vector = list(embedder.embed(question))[0] 
    
    doc_vectors = []
    for doc in results:
        dense_str = doc['dense'].strip('[]').split(',')
        dense_array = np.array([float(x) for x in dense_str])
        doc_vectors.append(dense_array)
    
    similarities = [
        cosine_similarity(question_vector, doc_array)
        for doc_array in doc_vectors
    ]
    
    top_indices = np.argsort(similarities)[-top_k:][::-1]
    top_results = [results[i] for i in top_indices]
    return top_results


reranked_response = rerank_results(question, response["results"])

reranked_response