In [None]:
DATA_PATH = "../../data/sample/products.csv"
COLLECTION_NAME = "products"

In [None]:
import os
from dotenv import load_dotenv
import chromadb
from chromadb.config import Settings

load_dotenv(".env")

CHROMA_SERVER_AUTH_CREDENTIALS = os.getenv("CHROMA_SERVER_AUTH_CREDENTIALS")

client = chromadb.HttpClient(
    host="https://chroma.liara.run",
    settings=Settings(
        chroma_client_auth_provider="chromadb.auth.token_authn.TokenAuthClientProvider",
        chroma_client_auth_credentials=CHROMA_SERVER_AUTH_CREDENTIALS,
    ),
)

In [None]:
from sentence_transformers import SentenceTransformer

LaBSE_model = SentenceTransformer("setu4993/LaBSE")

In [None]:
client.create_collection(name=COLLECTION_NAME)

In [None]:
import pandas as pd

sample_products = pd.read_csv(DATA_PATH)
product_collection = client.get_collection(name=COLLECTION_NAME)

documents = []
ids = []
metadatas = []
embeddings = []

for i, row in sample_products.iterrows():
    title_embedding = LaBSE_model.encode(row["title_fa"]).tolist()
    ids.append(str(row["id"]))
    documents.append(row["title_fa"])
    embeddings.append(title_embedding)
    metadatas.append(row.to_dict())

product_collection.upsert(
    documents=documents, metadatas=metadatas, ids=ids, embeddings=embeddings,
)

print(product_collection.peek(limit=5))