In [None]:
from sentence_transformers import SentenceTransformer, util
import sys

sys.path.append("..")

from src.auth.aurora import get_sales_client, get_portal_client

In [None]:
sales_client = get_sales_client()
sales_client_df = sales_client.query_to_pandas("SELECT * FROM t_client")

In [None]:
portal_client = get_portal_client()
portal_client_df = portal_client.query_to_pandas("SELECT * FROM api_clients WHERE parent_id = 1")

In [None]:
model = SentenceTransformer('all-MiniLM-L6-v2')

def get_embedding_dict(client_df):
    client_names = client_df["name"].to_list()
    embeddings = model.encode(client_names, convert_to_tensor=True)
    sales_embeddings = {
        client_name: embedding
        for client_name, embedding
        in zip(client_names, embeddings)
    }
    return sales_embeddings

In [None]:
sales_embeddings = get_embedding_dict(sales_client_df)
portal_embeddings = get_embedding_dict(portal_client_df)

In [None]:
for sales_name, sales_embedding in sales_embeddings.items():
    similarity_dict = {}
    print(sales_name)
    for portal_name, portal_embedding in portal_embeddings.items():
        similarity = util.pytorch_cos_sim(sales_embedding, portal_embedding)
        similarity_dict[portal_name] = similarity.item()
    for name, score in sorted(
        similarity_dict.items(), key=lambda item: item[1], reverse=True
    )[:5]:
        print(f"------> {name} = {score}")