In [1]:
import torchvision

In [2]:
# import zipfile

# # Ruta del archivo ZIP
# zip_file_path = '/content/tesis-main.zip'

# # Ruta donde quieres extraer los archivos
# extract_to_path = '/content/'

# # Descomprimir el archivo
# with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
#     zip_ref.extractall(extract_to_path)

# print("Archivos extraídos con éxito.")

* Carga el video utilizando cv2.VideoCapture.
* Extrae los frames del video uno por uno.
* Preprocesa cada frame para que sean compatibles con el modelo de PyTorch (tamaño, normalización, etc.).
* Usa un modelo preentrenado de PyTorch (como ResNet) para extraer características de cada frame.
* Aplica un algoritmo de clustering (como K-Means) a las características extraídas.
* Asigna cada frame al cluster correspondiente.
* Visualiza los resultados mostrando los frames y su cluster correspondiente.

In [3]:
import numpy as np

class SimilarityClustering:
    def __init__(self, threshold):
        self.threshold = threshold
        self.labels_ = None

    def fit(self, features):
        cosine_similarities = np.array([cosine_similarity(features[i], features[i + 1])
                                        for i in range(len(features) - 1)])

        cosine_similarities = np.insert(cosine_similarities, 0, cosine_similarity(features[0], features[0]))

        self.labels_ = label_clusters(cosine_similarities, self.threshold)

cosine_similarity = lambda vec1, vec2: np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

def label_clusters(cosine_similarities, threshold):
    clusters = np.zeros(len(cosine_similarities), dtype=int)
    current_cluster = 0
    for i in range(1, len(cosine_similarities)):
        #print(i,cosine_similarities[i])

        if cosine_similarities[i] < threshold:
            current_cluster += 1
        clusters[i] = current_cluster
    #print(clusters)
    return clusters

def clustering_function(model):
    def get_labels(features):
        model.fit(features)
        labels = model.labels_
        return labels
    return get_labels


In [4]:
import torch
from sklearn.cluster import AgglomerativeClustering
import warnings
from models.resnet18 import get_model as get_resnet18
from preprocessing.transforms import BASELINE
from clustering.metrics import eval_massive_cluster
from clustering.model import clustering_function
from experiment_framework import experiment
from validation import VALIDATION_DATASET
from utils import load_dataset
warnings.filterwarnings('ignore')


if torch.cuda.is_available():
 dev = "cuda:0"
else:
 dev = "cpu"


device = torch.device(dev)
epochs = 10
steps=1
clustering_eval_function = lambda cluster_labels, tags: eval_massive_cluster(cluster_labels, tags, steps, epochs)

dataset = load_dataset(VALIDATION_DATASET.keys())

EXPERIMENT_DEFINITIONS = [
   {
      "name": "ResnetAglomerative",
      "model": get_resnet18(device),
      "preprocessing": BASELINE,
      "dataset": dataset,
      "grouper_function": clustering_function(AgglomerativeClustering(None, distance_threshold=50)),
      "evaluation_function": clustering_eval_function,
      "show": True,
      "device": device
   },
      {
      "name": "SimilarityClustering",
      "model": get_resnet18(device),
      "preprocessing": BASELINE,
      "dataset": dataset,
      "grouper_function": clustering_function(SimilarityClustering(threshold=0.999)),
      "evaluation_function": clustering_eval_function,
      "show": True,
      "device": device
   },
]
for exp_definition in EXPERIMENT_DEFINITIONS:
   experiment(**exp_definition)


Experiment name: ResnetAglomerative CNN_posts.mp4
Precision - Mean: 0.75, std: 0.1124711093
Recall - Mean: 0.59, std: 0.1011511262
F1-Score - Mean: 0.64, std: 0.1054856045
Accuracy - Mean: 0.59, std: 0.1011511262

Experiment name: ResnetAglomerative PS_unistall.mp4
Precision - Mean: 0.64, std: 0.1100941510
Recall - Mean: 0.55, std: 0.1072706508
F1-Score - Mean: 0.58, std: 0.0959396456
Accuracy - Mean: 0.55, std: 0.1072706508

Experiment name: ResnetAglomerative FIFA_home_results.mp4
Precision - Mean: 0.61, std: 0.0231082076
Recall - Mean: 0.47, std: 0.0031606319
F1-Score - Mean: 0.51, std: 0.0010098722
Accuracy - Mean: 0.47, std: 0.0031606319

Experiment name: ResnetAglomerative Pinterest_view_save.mp4
Precision - Mean: 0.98, std: 0.0000000000
Recall - Mean: 0.29, std: 0.0000000000
F1-Score - Mean: 0.43, std: 0.0000000000
Accuracy - Mean: 0.29, std: 0.0000000000

Experiment name: ResnetAglomerative X_twitter_comment.mp4
Precision - Mean: 0.61, std: 0.0619669217
Recall - Mean: 0.57, st