In [1]:
import os
from pathlib import Path
from PIL import Image
import numpy as np
import pandas as pd 
import cv2

video_folder = "~/swifts_project/UnlabeledVideosSegmentation/Classes/JPEGImages/"
object_masks_folder = "~/swifts_project/UnlabeledVideosSegmentation/output/SwiftObjects/"
class_masks_folder = "~/swifts_project/UnlabeledVideosSegmentation/output/SwiftClasses/"

labels = pd.read_csv(Path(video_folder)/"../labelmap.txt", sep=':')
split_rgb = [s.split(',') for s in labels["color_rgb"]]
rgb_array = np.array(split_rgb, dtype=int).flatten()

palettedata = list(rgb_array)
# Fill the entire palette so that no entries in Pillow's
# default palette for P images can interfere with conversion
NUM_ENTRIES_IN_PILLOW_PALETTE = 256
num_bands = len("RGB")
num_entries_in_palettedata = len(palettedata) // num_bands
palettedata.extend([0, 0, 0] * (NUM_ENTRIES_IN_PILLOW_PALETTE - len(palettedata) // num_bands))

# Create a palette image whose size does not matter
arbitrary_size = 16, 16
palimage = Image.new('P', arbitrary_size)
palimage.putpalette(palettedata)

#palimage = Image.new('P', (16, 16))
#palimage.putpalette(rgb_array *32)

np.reshape(palimage.getpalette(), (-1, 3))


array([[250,  50,  83],
       [255,  96,  55],
       [ 36, 179,  83],
       [255, 204,  51],
       [184,  61, 245],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0

In [2]:
# Split each string by commas
split_rgb = [s.split(',') for s in labels["color_rgb"]]
rgb_array = np.array(split_rgb, dtype=int).flatten()
rgb_array


array([250,  50,  83, 255,  96,  55,  36, 179,  83, 255, 204,  51, 184,
        61, 245,   0,   0,   0])

In [3]:
# Defining classes to be compàtible with previous work using SAHI and Ultralytic's YOLOv8 libraries.
class Detection:
    class Mask:
        def __init__(self, mask):
            self.mask = mask
        def get_shifted_mask(self):
            return self.mask
    class Box:
        def __init__(self, box):
            self.box = box
        def get_shifted_box(self):
            return self.box
    class Category:
        def __init__(self, id, name):
            self.id = id
            self.name = name
    
    def __init__(self, mask, bbox, category_id, category_name, track_id):
        self.mask = Detection.Mask(mask)
        self.bbox = Detection.Box(bbox)
        self.category = Detection.Category(category_id, category_name)
        self.track_id = track_id

In [4]:
from swift_activity_callback import SwiftActivityCallback
from swift_closeness_callback import SwiftClosenessCallback
import sys
import matplotlib.pyplot as plt

def process_video(subdir, files):
    activity_callback = SwiftActivityCallback(trigger_name="activity_callback",
                                              trigger_length=30*4,
                                              frame_tolerance=20,
                                              trigger_bound=0.3,
                                              output_path=Path(f"~/swifts_project/UnlabeledVideosSegmentation/MaskProcessingOutput/{os.path.basename(subdir)}/ActivityCallbackOutput/").expanduser(),
                                              snapshot_path=Path(f"~/swifts_project/UnlabeledVideosSegmentation/MaskProcessingOutput/{os.path.basename(subdir)}/ActivityCallbackOutput/Snapshots/").expanduser(),
                                              classification_model = "./HuggingFaceVideoClassification/model")
    closeness_callback = SwiftClosenessCallback(trigger_name="closeness_callback",
                                                trigger_length=30*2,
                                                frame_tolerance=30,
                                                trigger_bound=1.1,
                                                output_path=Path(f"~/swifts_project/UnlabeledVideosSegmentation/MaskProcessingOutput/{os.path.basename(subdir)}/ClosenessCallbackOutput/").expanduser(),
                                                snapshot_path=Path(f"~/swifts_project/UnlabeledVideosSegmentation/MaskProcessingOutput/{os.path.basename(subdir)}/ClosenessCallbackOutput/Snapshots/").expanduser(),
                                                classification_model = "./HuggingFaceVideoClassification/model")
    i=0
    for file in files:
        i+=1
        img = Image.open(os.path.join(subdir, file))
        img_np = np.array(img.convert("RGB"), dtype=np.uint8)
        obj_mask = Image.open(Path(object_masks_folder).expanduser() / Path(os.path.basename(subdir)) / (Path(file).stem + '.png'))
        cls_mask = Image.open(Path(class_masks_folder).expanduser() / Path(os.path.basename(subdir)) / (Path(file).stem + '.png'))
        print(f"Reading {object_masks_folder / Path(os.path.basename(subdir)) / (Path(file).stem + '.png')}")
        print(f"Reading {class_masks_folder / Path(os.path.basename(subdir)) / (Path(file).stem + '.png')}")
        obj_mask = obj_mask.quantize(dither=Image.NONE)
        cls_mask = cls_mask.convert("RGB")
        cls_mask = cls_mask.quantize(palette=palimage, dither=Image.NONE)
        np.set_printoptions(threshold=sys.maxsize)
        cls_palette = cls_mask.getpalette(rawmode="RGB")
        cls_palette = np.reshape(cls_palette, (-1, 3))

        detections = []
        for obj_id in np.unique(np.array(obj_mask)):
            mask = np.where(np.array(obj_mask) == obj_id, 255, 0).astype(np.uint8)
            mask_indices = np.nonzero(mask)
            cls_values = np.array(cls_mask)[mask_indices]
            cls_count = np.bincount(cls_values)
            cls = np.argmax(cls_count)
            cls = cls_palette[cls,:]

            contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
            
            cnt = max(contours, key=cv2.contourArea)
            x, y, w, h = cv2.boundingRect(cnt)
            bbox = (x, y, x + w, y + h)
            
            cv2.drawContours(img_np, contours, -1, tuple(map(int, cls)), 3)
            split_rgb = [s.split(',') for s in labels["color_rgb"]]
            rgb_array = np.array(split_rgb, dtype=int)
            label_mask = np.all(rgb_array == cls, axis=1)
            first_matching_row_index = min(labels.loc[label_mask, :].index)
            label=labels.loc[first_matching_row_index,:]
            if label.name != 5:
                detections.append(Detection(mask, bbox, label.name, label["# label"], obj_id))
        activity_callback.callback(Path(file).stem, img_np, detections)
        closeness_callback.callback(Path(file).stem, img_np, detections)

In [5]:
#process_video(Path(video_folder) / "05110124/", os.listdir(Path(video_folder) / "05110124/"))

In [6]:
#process_video(Path(video_folder) / "ACICALADO/", os.listdir(Path(video_folder) / "ACICALADO/"))

In [None]:
for subdir, dirs, files in os.walk(os.path.expanduser(video_folder), followlinks=True):
   process_video(subdir, files)

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0


Reading ~/swifts_project/UnlabeledVideosSegmentation/output/SwiftObjects/20230606_145859_tp00001/00000000.png
Reading ~/swifts_project/UnlabeledVideosSegmentation/output/SwiftClasses/20230606_145859_tp00001/00000000.png
Calculating distance between object 0, of type: Nest, 2, and object 1, of type: Nest, 2
Calculating distance between object 0, of type: Nest, 2, and object 2, of type: Nest, 2
Calculating distance between object 0, of type: Nest, 2, and object 3, of type: Nest, 2
Calculating distance between object 0, of type: Nest, 2, and object 4, of type: Nest, 2
Calculating distance between object 0, of type: Nest, 2, and object 5, of type: Nest, 2
Calculating distance between object 0, of type: Nest, 2, and object 6, of type: Nest, 2
Calculating distance between object 0, of type: Nest, 2, and object 7, of type: Nest, 2
Calculating distance between object 0, of type: Nest, 2, and object 8, of type: Nest, 2
Calculating distance between object 0, of type: Nest, 2, and object 9, of ty

In [None]:
#process_video(Path(video_folder) / "COPULA/", os.listdir(Path(video_folder) / "COPULA/"))

In [None]:
#process_video(Path(video_folder) / "05110122/", os.listdir(Path(video_folder) / "05110122/"))

In [None]:
im = Image.fromarray(np.array([[[0,0,0],[250,50,83],[255,96,55],[36,179,83],[255,204,51]]]).astype(np.uint8)).convert("P", palette=Image.ADAPTIVE, colors=256).convert('RGB')
print(np.unique(np.array(im.getdata()), axis=0))

In [None]:
#im=Image.open('/home/shared/Swifts.Research.Project/XMem/SwiftsObjects/Annotations/05110122/job_1322754_annotations_2024_10_19_19_50_42_segmentation mask 1.1/SegmentationClass/00000000.png')
#print(np.unique(np.array(im.getdata()), axis=0))
#im=im.convert("P", palette=Image.ADAPTIVE, colors=256)
#print(np.unique(np.array(im.getdata()), axis=0))
#im=im.convert('RGB')
#print(np.unique(np.array(im.getdata()), axis=0))

In [None]:
# import torch
# torch.cuda.empty_cache()  # Releases unused memory
# torch.cuda.ipc_collect()  # Cleans up memory fragmentation
# import os

# os.system("nvidia-smi -q -d MEMORY")  # Check memory usage
# os.system("nvidia-smi --gpu-reset")   # Reset GPU (admin required)

In [None]:
from transformers import AutoModel, AutoProcessor

# Load your model
model = AutoModel.from_pretrained("../../HuggingFaceClassifModel")

preprocessor = AutoProcessor.from_pretrained("../../HuggingFaceClassifModel")

# Push to the Hugging Face Hub
model.push_to_hub("swift-events-classification-model")
preprocessor.push_to_hub("swift-events-classification-model")


2025-06-01 12:05:10.049523: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-01 12:05:10.234061: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748768710.325127  566593 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748768710.360593  566593 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-06-01 12:05:10.552333: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

HfHubHTTPError: 401 Client Error: Unauthorized for url: https://huggingface.co/api/repos/create (Request ID: Root=1-683c17c9-131f87072e3a34b24fad61ff;4bc63716-6327-42a1-ac61-11b31eb29897)

Invalid username or password.

In [1]:
from transformers import pipeline
pipeline = pipeline("video-classification", model="jorgegalinmor/swift-events-classification-model")

2025-06-05 01:51:57.228893: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-05 01:51:57.239382: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749077517.252112 2121641 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749077517.255840 2121641 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-06-05 01:51:57.270240: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

config.json:   0%|          | 0.00/1.20k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/345M [00:00<?, ?B/s]

Some weights of VideoMAEForVideoClassification were not initialized from the model checkpoint at jorgegalinmor/swift-events-classification-model and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


OSError: jorgegalinmor/swift-events-classification-model does not appear to have a file named preprocessor_config.json. Checkout 'https://huggingface.co/jorgegalinmor/swift-events-classification-model/tree/main' for available files.