# TODO

- Création d'une collection portant le nom de la vidéo à chaque nouvelle execution          OK
- Ajout d'un vecteur de chaque frame d'une vidéo accompagné de frame_name, encoding_name    (faire le lien avec le fichier d'encodage)


# Imports

In [1]:
import chromadb
import torch
import clip
import os

from embedding import compute_similarity,extract_embedding

# Création

In [2]:
# Création du client et de la base locale
client = chromadb.PersistentClient(path="./chroma_db")

# Fonctions

In [3]:
def create_collection(collection_name, embedding_function):
    """
    Crée une collection dans ChromaDB avec la fonction d'embedding spécifiée.
    
    Args:
        collection_name (str): Le nom de la collection à créer.
        embedding_function: La fonction d'embedding à utiliser pour la collection.
    
    Returns:
        Collection: La collection créée.
    """
    return client.get_or_create_collection(
        name=collection_name,
        embedding_function=embedding_function
    )

In [4]:
# Ajout de documents avec vecteurs + métadonnées
def add_detection(collection, ids, embeddings, metadatas):
    collection.add(
        ids=ids,
        embeddings=embeddings,
        metadatas=metadatas
    )

In [5]:
def del_detection(collection, ids):
    collection.delete(ids=ids)

# Execution

### Chargement du modèle CLIP

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"
clip_model, preprocess = clip.load("ViT-B/32", device=device)

### Définition des paths

In [7]:
path = "C:/Users/cleme/Desktop/Ecole/M1/Projet/Projet_S2/V2/Video_test"
video_name = "This Video Is 3 Seconds"
video_path = os.path.join(path, video_name)+".mp4"

### Création de la collection associée à la vidéo à analyser

In [8]:
# Sanitize the video_name to make it a valid collection name
sanitized_video_name = video_name.replace(" ", "_")
sanitized_video_name = sanitized_video_name.replace("-", "_")
sanitized_video_name = sanitized_video_name.replace(".", "_")

# Define a custom embedding function that conforms to the required signature
class ClipEmbeddingFunction:
	def __call__(self, input):
		# Ensure input is a tensor and process it using the CLIP model
		if not isinstance(input, torch.Tensor):
			raise ValueError("Input to embedding function must be a torch.Tensor")
		return clip_model.encode_image(input)

# Create the collection with the custom embedding function
embedding_function = ClipEmbeddingFunction()
create_collection(sanitized_video_name, embedding_function=embedding_function)

Collection(name=This_Video_Is_3_Seconds)

### Extraction des frames

In [9]:
import cv2
import os
import datetime
from ultralytics import YOLO
from sentence_transformers import SentenceTransformer
import shutil

In [10]:


FPS_TARGET = 12  # FPS cible
TARGET_SIZE = (640, 360)  # Nouvelle taille des frames


def create_unique_folder(base_dir, prefix="frames"):
    timestamp = datetime.datetime.now().strftime("%d%m_%H%M%S")
    folder_path = os.path.join(base_dir, f"{prefix}_{timestamp}")
    os.makedirs(folder_path, exist_ok=True)
    return folder_path

def resize_frame(frame, size):
    return cv2.resize(frame, size, interpolation=cv2.INTER_AREA)

def extract_and_resize_frames(video_path, output_dir, target_fps, target_size, start_frame=0, custom_output_dir=None):
    if not os.path.exists(video_path):
        print(f"Erreur: Le fichier vidéo '{video_path}' n'existe pas.")
        return None

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Erreur: Impossible d'ouvrir la vidéo '{video_path}'")
        return None

    original_fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames_original = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    total_frames_target = int((total_frames_original / original_fps) * target_fps)

    print(f"Total frames original: {total_frames_original}")
    print(f"Total frames cible: {total_frames_target}")

    # Vérification et ajustement de start_frame
    if start_frame > total_frames_target:
        print(f"Erreur: La frame de départ ({start_frame}) est supérieure au nombre total de frames cibles ({total_frames_target}).")
        start_frame = total_frames_target  # Ajuster start_frame pour ne pas dépasser total_frames_target
        print(f"start_frame ajusté à {start_frame}")

    # Produit en croix pour ajuster la frame de départ
    start_time = (start_frame * total_frames_original) / total_frames_target
    adjusted_start_frame = round(start_time)
    print(f"Start frame ajusté: {adjusted_start_frame}")

    # Utilisation du dossier personnalisé ou création d'un nouveau dossier
    frames_dir = custom_output_dir if custom_output_dir else create_unique_folder(output_dir, "frames_resized")
    os.makedirs(frames_dir, exist_ok=True)

    # Extraire et redimensionner les frames à partir de la frame ajustée
    extracted_count = start_frame
    cap.set(cv2.CAP_PROP_POS_FRAMES, adjusted_start_frame)  # Placer le lecteur vidéo à la frame ajustée

    while True:
        success, frame = cap.read()
        if not success:
            break

        resized_frame = resize_frame(frame, target_size)
        frame_filename = os.path.join(frames_dir, f"frame_{extracted_count:06d}.png")
        cv2.imwrite(frame_filename, resized_frame)
        extracted_count += 1

    cap.release()
    print(f"Extraction et redimensionnement terminés. {extracted_count - adjusted_start_frame} images enregistrées dans '{frames_dir}'")
    return frames_dir



In [None]:
# ============================
#           EXÉCUTION
# ============================

path = "C:/Users/cleme/Desktop/Ecole/M1/Projet/Projet_S2/V2/Video_test"
video_name = "This Video Is 3 Seconds"
video_path = os.path.join(path, video_name)+".mp4"

frames_base_dir = path + "/extraction"
timestamp = datetime.datetime.now().strftime("%d%m_%H%M%S")

custom_folder = ""
frames_dir = extract_and_resize_frames(video_path, frames_base_dir, FPS_TARGET, TARGET_SIZE, start_frame=0, custom_output_dir=custom_folder)


### Embedding des frames une par une

In [15]:
from PIL import Image

# Parcourir chaque image dans le dossier frames_base_dir
for root, dirs, files in os.walk(frames_base_dir):
    for file in files:
        if file.endswith(('.png', '.jpg', '.jpeg')):  # Vérifier les extensions d'image
            image_path = os.path.join(root, file)
            
            # Charger l'image et la prétraiter
            image = Image.open(image_path).convert("RGB")
            preprocessed_image = preprocess(image).unsqueeze(0).to(device)
            
            # Extraire l'embedding
            embedding = extract_embedding(preprocessed_image)
            
            print(f"Embedding extrait pour {file}: {embedding.cpu().detach().numpy()}")

            # Ajouter l'embedding à la collection ChromaDB
            add_detection(
                collection=client.get_collection(sanitized_video_name),
                ids=[file],
                embeddings=embedding.cpu().detach().numpy().tolist(),
                metadatas=[{"filename": file}]
            )

Embedding extrait pour frame_000000.png: [[  0.0049387     0.00324    0.015294    0.055829   -0.028008   -0.016487   -0.013419    0.026423    0.054692  -0.0092766    0.022828   -0.024648  -0.0077069   -0.027879    0.021474   -0.026995    0.023537   -0.018817   0.0015015   -0.018712    0.074844     0.02333   -0.044996   -0.026041    0.022688   -0.002147
   -0.0089531   0.0087432    0.022889   -0.025067    0.020696    0.013755   0.0022144   0.0080456   -0.019971   0.0074423    -0.01943   0.0031837   0.0019857    -0.13622   -0.027761 -0.00017972  -0.0069522   0.0066197     0.02497     0.37996   -0.024129    0.015418    0.060189   0.0030632     0.01346    0.012474
     0.051564   -0.035485    0.003145  -0.0072282   -0.043588    0.013794    0.026281    0.018437    0.053402    -0.03686   -0.011146   -0.011012   -0.015395    0.012756   -0.001107    0.045339    0.021598    0.030747   -0.013838   0.0028758    0.037735   -0.019416   -0.033112   -0.024044   -0.011598   0.0063583
    0.0059306    

Insert of existing embedding ID: frame_000059.png
Add of existing embedding ID: frame_000059.png


Embedding extrait pour frame_000059.png: [[ -0.0013096   0.0037897    0.020175    0.044104   -0.030155  -0.0020159   -0.026408    0.036118    0.031658  -0.0079712    0.018063   -0.023135    0.028329  -0.0021836    0.019775   -0.032496    0.026831   -0.017053  -0.0046796   -0.021113    0.078104    0.043557   -0.020443   -0.013188    0.031931    0.033946
    0.0033226   0.0068512    0.041085   -0.028342    0.053279    0.005208  -0.0073794    0.021196  -0.0088818     0.03607   -0.039586   -0.013958  -0.0039419    -0.18812   -0.013834   -0.022929   -0.028052    0.036074   -0.014075     0.31414   -0.047333    0.016325     0.06113   -0.019954  -0.0044814   0.0057414
     0.045712   -0.035648     0.01781  -0.0053565   -0.033291    0.052915  -0.0020281    0.013666    0.032661   -0.045374    0.026237    0.011487   -0.011147    0.008185    0.012311    0.044546    0.016134    0.030982 -0.00055676  -0.0086417    0.030914   -0.034018   -0.016854   -0.036781  -0.0011912   0.0071824
     0.012664   -

# Test ajout

In [22]:
# Génération de données de test (10 vecteurs de dimension 384)
import numpy as np
import random

def generate_test_data(n_vectors=10, dim=384):
    vectors = np.random.rand(n_vectors, dim)
    metadatas = [{"frame_name": random.choice(["cat", "dog", "bird"]), "encoding_model":random.choice(["CLIP", "FLORENCE2"])} for _ in range(n_vectors)]
    return vectors, metadatas

In [23]:
random_data = generate_test_data(10, 384)

random_vectors = random_data[0].tolist()
random_metadatas = random_data[1]
random_ids = [f"vec_{i}" for i in range(10)]

# Ajout de données de test à la collection
add_detection(client.get_collection(sanitized_video_name), random_ids, random_vectors, random_metadatas)

# Display

In [16]:
# affichage de chaque nom de collection
collections = client.list_collections()
print("Collections existantes :")
for collection in collections:
    print(f"- {collection}")



Collections existantes :
- This_Video_Is_3_Seconds


In [17]:
# Récupérer toutes les données (ids, embeddings, metadatas) de chaque collection

collections = client.list_collections() # Liste des collections

for collection in collections:
    print(f"Collection: {collection}")
    detection = client.get_collection(collection) # Récupérer la collection
    all_data = detection.get(include=["embeddings", "metadatas"]) # Récupérer toutes les données de la collection

    # Afficher tout le contenu
    for i in range(len(all_data["ids"])):
        print(f"🔹 ID: {all_data['ids'][i]}")
        if all_data['embeddings'] is not None:
            print(f"🧠 Embedding: {all_data['embeddings'][i]}")
        print(f"📌 Métadonnées: {all_data['metadatas'][i]}")
        print("-" * 40)

"""
print(len(all_data['ids']))
print(all_data['embeddings'][0].shape)"""


Collection: This_Video_Is_3_Seconds
🔹 ID: frame_000059.png
🧠 Embedding: [ -0.0013096   0.0037897    0.020175    0.044104   -0.030155  -0.0020159   -0.026408    0.036118    0.031658  -0.0079712    0.018063   -0.023135    0.028329  -0.0021836    0.019775   -0.032496    0.026831   -0.017053  -0.0046796   -0.021113    0.078104    0.043557   -0.020443   -0.013188    0.031931    0.033946
   0.0033226   0.0068512    0.041085   -0.028342    0.053279    0.005208  -0.0073794    0.021196  -0.0088818     0.03607   -0.039586   -0.013958  -0.0039419    -0.18812   -0.013834   -0.022929   -0.028052    0.036074   -0.014075     0.31414   -0.047333    0.016325     0.06113   -0.019954  -0.0044814   0.0057414
    0.045712   -0.035648     0.01781  -0.0053565   -0.033291    0.052915  -0.0020281    0.013666    0.032661   -0.045374    0.026237    0.011487   -0.011147    0.008185    0.012311    0.044546    0.016134    0.030982 -0.00055676  -0.0086417    0.030914   -0.034018   -0.016854   -0.036781  -0.0011912  

"\nprint(len(all_data['ids']))\nprint(all_data['embeddings'][0].shape)"