* Carga el video utilizando cv2.VideoCapture.
* Extrae los frames del video uno por uno.
* Preprocesa cada frame para que sean compatibles con el modelo de PyTorch (tamaño, normalización, etc.).
* Usa un modelo preentrenado de PyTorch (como ResNet) para extraer características de cada frame.
* Aplica un algoritmo de clustering (como K-Means) a las características extraídas.
* Asigna cada frame al cluster correspondiente.
* Visualiza los resultados mostrando los frames y su cluster correspondiente.

In [1]:
import torch
from sklearn.cluster import AgglomerativeClustering
import warnings
from models.resnet18 import get_model as get_resnet18
from preprocessing.transforms import BASELINE
from clustering.metrics import eval_massive_cluster
from clustering.model import clustering_function
from experiment_framework import experiment

warnings.filterwarnings('ignore')


if torch.cuda.is_available():
 dev = "cuda:0"
else:
 dev = "cpu"


device = torch.device(dev)
epochs = 10
steps=1
clustering_eval_function = lambda cluster_labels, tags: eval_massive_cluster(cluster_labels, tags, steps, epochs)


EXPERIMENT_DEFINITIONS = [
   {
      "name": "ResnetAglomerative video.mp4",
      "model": get_resnet18(device),
      "preprocessing": BASELINE,
      "path": "video.mp4",
      "grouper_function": clustering_function(AgglomerativeClustering(None, distance_threshold=50)),
      "evaluation_function": clustering_eval_function,
      "show": True,
      "device": device
   },
   {
      "name": "ResnetAglomerative video2.mp4",
      "model": get_resnet18(device),
      "preprocessing": BASELINE,
      "path": "video2.mp4",
      "grouper_function": clustering_function(AgglomerativeClustering(None, distance_threshold=50)),
      "evaluation_function": clustering_eval_function,
      "show": True,
      "device": device
   },
]

for exp_definition in EXPERIMENT_DEFINITIONS:
   experiment(**exp_definition)


Experiment name: ResnetAglomerative video.mp4
Precision - Mean: 0.77, std: 0.0295348935
Recall - Mean: 0.66, std: 0.0369854593
F1-Score - Mean: 0.66, std: 0.0325276812
Accuracy - Mean: 0.66, std: 0.0369854593

Experiment name: ResnetAglomerative video2.mp4
Precision - Mean: 0.75, std: 0.0333574902
Recall - Mean: 0.75, std: 0.0266345592
F1-Score - Mean: 0.73, std: 0.0309757219
Accuracy - Mean: 0.75, std: 0.0266345592
