# Création

In [1]:
import chromadb

# Création du client et de la base locale
client = chromadb.PersistentClient(path="./chroma_db")

# Création d'une collection
detection = client.get_or_create_collection("detection")
Image = client.get_or_create_collection("image")

# Fonctions

In [2]:
# Ajout de documents avec vecteurs + métadonnées
def add_detection(ids, embeddings, metadatas):
    detection.add(
        ids=ids,
        embeddings=embeddings,
        metadatas=metadatas
    )

In [3]:
def del_detection(ids):
    detection.delete(ids=ids)

In [4]:
# Génération de données de test (10 vecteurs de dimension 384)
import numpy as np
import random

def generate_test_data(n_vectors=10, dim=384):
    vectors = np.random.rand(n_vectors, dim)
    metadatas = [{"category": random.choice(["cat", "dog", "bird"])} for _ in range(n_vectors)]
    return vectors, metadatas

In [None]:
# Recherche par proximité de vecteurs
from scipy.spatial.distance import cosine
import numpy as np

def ask_query(collection, query_vector, query, max_dist):   #max_dist compris entre [0,2]
    # Obtenir le nombre de documents dans la collection
    num_documents = len(collection.get()["ids"])

    # Recherche hybride (similitude vectorielle + filtre sur la catégorie)
    results = detection.query(
        query_embeddings=[query_vector],
        n_results=num_documents,  # Nombre de résultats souhaités
        where=query,  # Filtrage par métadonnée
        include=["embeddings", "metadatas", "distances"],
    )

    filtered_results = {    # Filtrer les résultats pour n'afficher que ceux avec une distance inférieure à x
            "ids": [],
            "embeddings": [],
            "metadatas": [],
            "distances": []
        }
    
    cosine_dist = []

    for i in range(len(results["ids"])):
        embedding = np.squeeze(np.array(results["embeddings"][i]))  # Convertir en vecteur 1D
        query_vector = np.squeeze(np.array(query_vector))  # Assurer que la requête est aussi 1D
        
        print(f"Embedding shape: {embedding.shape}, Query shape: {query_vector.shape}")  # Debug
        
        cosine_dist.append(cosine(embedding, query_vector))  # Calcul de la distance cosinus

    

    for i in range(len(results["ids"])):
        if cosine_dist[i] <= max_dist:  # Access the first element of the list
            filtered_results["ids"].append(results["ids"][i])
            filtered_results["embeddings"].append(results["embeddings"][i])
            filtered_results["metadatas"].append(results["metadatas"][i])
            filtered_results["distances"].append(results["distances"][i][0])  # Access the first element of the list

    results = filtered_results

    return results


In [20]:
# Recherche par proximité de vecteurs
from scipy.spatial.distance import cosine
import numpy as np

def ask_query(collection, query_vector, query, max_dist):   #max_dist compris entre [0,2]
    all_data = collection.get(include=["embeddings", "metadatas"])

    # Obtenir le nombre de documents dans la collection
    num_documents = len(all_data["ids"])
    
    # Calcul de la distance cosinus entre le vecteur de requête et tous les vecteurs de la collection
    cosine_dist = [cosine(query_vector, np.squeeze(np.array(embedding))) for embedding in all_data["embeddings"]]

    # Filtrage des résultats
    results = {
        "ids": [],
        "embeddings": [],
        "metadatas": [],
        "distances": []
    }

    for i in range(num_documents):
        if (cosine_dist[i] <= max_dist) & (all_data["metadatas"][i]["category"] == query["category"]):
            results["ids"].append(all_data["ids"][i])
            results["embeddings"].append(all_data["embeddings"][i])
            results["metadatas"].append(all_data["metadatas"][i])
            results["distances"].append(cosine_dist[i])

    return results

    

# Execution

In [5]:
# Génération des données

# Ajout de 10 vecteurs de dimension 384
vectors, metadatas = generate_test_data(n_vectors=10, dim=384)
add_detection(ids=[f"{i}" for i in range(10)], embeddings=vectors, metadatas=metadatas)

In [10]:
# Récupérer toutes les données (ids, embeddings, metadatas)
all_data = detection.get(include=["embeddings", "metadatas"])

# Afficher tout le contenu
for i in range(len(all_data["ids"])):
    print(f"🔹 ID: {all_data['ids'][i]}")
    if all_data['embeddings'] is not None:
        print(f"🧠 Embedding: {all_data['embeddings'][i]}")
    print(f"📌 Métadonnées: {all_data['metadatas'][i]}")
    print("-" * 40)

print(len(all_data['ids']))
print(all_data['embeddings'][0].shape)



🔹 ID: 0
🧠 Embedding: [4.20689360e-01 6.65608731e-01 6.88242652e-01 1.07389350e-01
 8.25969975e-01 8.34605625e-01 9.07591531e-01 6.10553756e-01
 4.33776758e-01 6.46125128e-02 2.94450452e-01 2.65345091e-01
 6.07924492e-01 9.31004741e-01 2.87431157e-01 6.21124319e-01
 5.69217502e-01 1.29633561e-01 8.99026765e-01 3.19665148e-01
 5.20852021e-01 1.64124640e-01 1.57231193e-01 5.66328033e-01
 2.84178799e-01 6.23025604e-01 2.57900008e-01 7.06783412e-01
 5.30249537e-01 8.58998410e-01 8.45940352e-02 7.89973402e-01
 1.05879244e-01 2.73048541e-02 9.92994490e-02 6.38203315e-01
 9.70619666e-01 4.88070982e-01 8.53528904e-01 4.13090321e-01
 9.63321102e-01 8.66409451e-01 8.96133563e-01 4.84728344e-01
 3.93009963e-01 6.48832351e-02 5.67146438e-01 9.98340864e-02
 8.98653167e-01 6.69959717e-02 9.97304368e-02 4.66064611e-02
 6.25485312e-01 3.66494256e-01 8.86465888e-02 2.39447169e-01
 6.07931650e-01 1.24731367e-01 9.66137442e-01 3.03407531e-01
 8.09358173e-01 6.34647015e-01 6.78389446e-01 6.38032377e-01
 1.

In [23]:
# Recherche par Query

research = ask_query(detection, vectors[0], query={"category": "cat"}, max_dist=0.8)

In [24]:
# Afficher les résultats

for i in range(len(research["ids"])):
    print(f"🔹 ID: {research['ids'][i]}")
    if research['embeddings'] is not None:
        print(f"🧠 Embedding: {research['embeddings'][i]}")
    print(f"📌 Métadonnées: {research['metadatas'][i]}")
    print(f"📏 Distance: {research['distances'][i]}")
    print("-" * 40)

🔹 ID: 5
🧠 Embedding: [0.16501514 0.43102511 0.74864862 0.6914652  0.06673695 0.18245603
 0.65888978 0.85647495 0.21840625 0.21164063 0.79739436 0.90714835
 0.17796686 0.20462782 0.49020753 0.24111085 0.13503134 0.90303488
 0.92616206 0.1855838  0.20166236 0.58065091 0.01438745 0.78612911
 0.91387314 0.68796424 0.73462764 0.56113077 0.83777066 0.92407569
 0.76992944 0.21945147 0.60039603 0.4345997  0.84477241 0.1000421
 0.36254459 0.75698316 0.85323171 0.06705854 0.31771276 0.0792671
 0.34102848 0.79042852 0.53134298 0.46995683 0.84930738 0.9365682
 0.59891501 0.2929078  0.54295335 0.43831957 0.15694937 0.88578569
 0.66250366 0.8226372  0.8887353  0.9439627  0.05526363 0.04321218
 0.64106407 0.87680733 0.06526874 0.52161609 0.34908036 0.58412399
 0.46504051 0.10408514 0.5856201  0.32591973 0.46126905 0.10695279
 0.68992961 0.91044718 0.39966637 0.78104795 0.7822425  0.51657304
 0.69214412 0.85709648 0.57989927 0.39918899 0.91645247 0.16602606
 0.88449415 0.15580085 0.34291384 0.37757293