## Mounting Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Installing dependencies

In [2]:
!pip install roboflow -q
#!pip install gdown -q
!pip install ultralytics -q
#!mkdir /content/drive/MyDrive/hvac-test/

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/88.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.7/88.7 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.8/66.8 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.9/49.9 MB[0m [31m29.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m77.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.2/4.2 MB[0m [31m94.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m64.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!gdown --folder 1dURx9E7JfXHB7t-ztyH3mCag2FF01PNc -O /content/drive/MyDrive/hvac-test/ -q

In [None]:
!unzip /content/drive/MyDrive/hvac-test/hvacs-dataset/flat-hvac-images.zip -d /content/drive/MyDrive/hvac-test/hvacs-dataset/

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/drive/MyDrive/hvac-test/hvacs-dataset/flat-hvac-images/IMG_8141_HEIC.rf.29dcebd44e157550ff93e8b8a11dc895.jpg  
  inflating: /content/drive/MyDrive/hvac-test/hvacs-dataset/__MACOSX/flat-hvac-images/._IMG_8141_HEIC.rf.29dcebd44e157550ff93e8b8a11dc895.jpg  
  inflating: /content/drive/MyDrive/hvac-test/hvacs-dataset/flat-hvac-images/a462e19d-4834-46d5-a5ac-89a6f1661e93_Android_1756329152938_jpg.rf.3344df81e75eb28efe26f1fa0507c722.jpg  
  inflating: /content/drive/MyDrive/hvac-test/hvacs-dataset/__MACOSX/flat-hvac-images/._a462e19d-4834-46d5-a5ac-89a6f1661e93_Android_1756329152938_jpg.rf.3344df81e75eb28efe26f1fa0507c722.jpg  
  inflating: /content/drive/MyDrive/hvac-test/hvacs-dataset/flat-hvac-images/7684ad6f-86b9-4ea8-bf31-cfceab4868b3_filenameA289BDFC-6FD1-4DD8-987A-2B0E7FEAB896_jpg.rf.dc629871e4bc066c6fcb2149309ea26b.jpg  
  inflating: /content/drive/MyDrive/hvac-test/hvacs-dataset/__MACOSX/flat-hvac

In [None]:
!ls /content/drive/MyDrive/hvac-test/hvacs-dataset/flat-hvac-images/*.jpg | wc -l  # 8992-> 5053

5053


## EDA

The image dataset is completely unprocessed, the data doenst have appropiate name per file. Data is duplicated in a concerning amount. So this section is mainly to delete duplicates and rename every file. By manual experiments Hash MD5 couldn't remove all duplicate, so we are going to use the perception hashing method.

In [None]:
import os
from collections import defaultdict
from tqdm import tqdm
import cv2

def remove_duplicates_and_rename(image_folder):
    image_folder = os.path.abspath(image_folder)
    image_files = [f for f in os.listdir(image_folder) if f.lower().endswith('.jpg')]

    perceptual_dict = defaultdict(list)

    for filename in tqdm(image_files, desc="Calculating perceptual hashes"):
        filepath = os.path.join(image_folder, filename)
        img = cv2.imread(filepath)
        if img is not None:
            small_img = cv2.resize(img, (8, 8))
            gray = cv2.cvtColor(small_img, cv2.COLOR_BGR2GRAY)
            avg = gray.mean()
            hash_str = ''.join(['1' if pixel > avg else '0' for pixel in gray.flatten()])
            perceptual_dict[hash_str].append(filename)

    files_to_keep = set()

    for files in perceptual_dict.values():
        if len(files) > 1:
            files_to_keep.add(files[0])
        else:
            files_to_keep.add(files[0])

    files_to_remove = set(image_files) - files_to_keep

    for filename in files_to_remove:
        filepath = os.path.join(image_folder, filename)
        os.remove(filepath)

    remaining_files = sorted(list(files_to_keep))

    for i, old_name in enumerate(remaining_files):
        new_name = f"{i+1}.jpg"
        old_path = os.path.join(image_folder, old_name)
        new_path = os.path.join(image_folder, new_name)

        if old_path != new_path:
            os.rename(old_path, new_path)


path = "/content/drive/MyDrive/hvac-test/hvacs-dataset/flat-hvac-images/"
duplicates = remove_duplicates_and_rename(path)

Calculating perceptual hashes: 100%|██████████| 8992/8992 [13:13<00:00, 11.34it/s]


## Uploading data to Roboflow

In [None]:
import roboflow
import cv2

API_KEY = API_KEY

path = "/content/drive/MyDrive/hvac-test/hvacs-dataset/flat-hvac-images/"

rf = roboflow.Roboflow(api_key=API_KEY)

# get a workspace
workspace = rf.workspace("notfound")
workspace.upload_dataset(
    path, 
    "hvac-test", 
    num_workers=5,
    project_license="MIT",
    project_type="object-detection",
    batch_name="initial upload",
    num_retries=3,
    is_prediction=False 
)

loading Roboflow workspace...
loading Roboflow project...
['/data.yaml', '/data_1.yaml']


100%|██████████| 5053/5053 [00:00<00:00, 983426.20it/s]

Created project notfound/hvac-test-km6yv





[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[UPLOADED] /content/drive/MyDrive/hvac-test/hvacs-dataset/flat-hvac-images/55.jpg (8vmGhfAeWvoCf8o2hVTU) [3.1s]
[UPLOADED] /content/drive/MyDrive/hvac-test/hvacs-dataset/flat-hvac-images/50.jpg (6yDxqxvrs1YUJqIFMSPA) [108.4s]
[UPLOADED] /content/drive/MyDrive/hvac-test/hvacs-dataset/flat-hvac-images/56.jpg (liGxBOWylXDRtXHMKKBq) [2.7s]
[UPLOADED] /content/drive/MyDrive/hvac-test/hvacs-dataset/flat-hvac-images/57.jpg (6f1YDjGCFAe9dKUBlbkH) [1.9s]
[UPLOADED] /content/drive/MyDrive/hvac-test/hvacs-dataset/flat-hvac-images/59.jpg (rkRHhwk2nGZHJlIsBcNH) [2.7s]
[UPLOADED] /content/drive/MyDrive/hvac-test/hvacs-dataset/flat-hvac-images/60.jpg (DNSvR3KzLpClb9CxfKDd) [2.0s]
[UPLOADED] /content/drive/MyDrive/hvac-test/hvacs-dataset/flat-hvac-images/58.jpg (WpbZrwBf9xNf0nkPL9gd) [3.4s]
[UPLOADED] /content/drive/MyDrive/hvac-test/hvacs-dataset/flat-hvac-images/61.jpg (5C0hvHUi6DLKntwzbXC1) [2.5s]
[UPLOADED] /content/drive/MyDrive/hva

## Training

In [9]:
HOME = "/content/drive/MyDrive/hvac-test/"
%cd {HOME}

from roboflow import Roboflow

path = "/content/drive/MyDrive/hvac-test/hvacs-dataset/flat-hvac-images/"

rf = Roboflow(api_key="hAmY3zNHCanvartx0nyI")
project = rf.workspace("notfound").project("hvac-test-km6yv")
version = project.version(2)
dataset = version.download("yolov11")

/content/drive/MyDrive/hvac-test
loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in hvac-test-2 to yolov11:: 100%|██████████| 91071/91071 [00:08<00:00, 11269.38it/s]





Extracting Dataset Version Zip to hvac-test-2 in yolov11:: 100%|██████████| 1712/1712 [00:14<00:00, 121.38it/s]


In [7]:
HOME = "/content/drive/MyDrive/hvac-test/"
%cd {HOME}

!yolo task=detect mode=train model=yolo11n.pt data="/content/drive/MyDrive/hvac-test/hvac-test-2/data.yaml" epochs=150 batch=32 imgsz=640 plots=True

/content/drive/MyDrive/hvac-test
Ultralytics 8.3.203 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=32, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/drive/MyDrive/hvac-test/hvac-test-2/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=150, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train6, nbs=64, nms=False, opset=None, optimize=False, optimizer=

## Testing in videos
Videos' inferences are stored in a new folder

In [None]:
!cd /content/drive/MyDrive/hvac-test/hvacs-dataset/Videos/
!mkdir /content/drive/MyDrive/hvac-test/hvacs-dataset/Inferences/

import cv2
from ultralytics import YOLO
import os

def process_videos_with_yolo(model_path, input_videos_folder, output_folder, conf_threshold=0.5):

    model = YOLO(model_path)

    os.makedirs(output_folder, exist_ok=True)

    video_files = [f for f in os.listdir(input_videos_folder)
                  if os.path.splitext(f)[1].lower() == '.mp4']

    for video_file in video_files:
        input_path = os.path.join(input_videos_folder, video_file)
        output_path = os.path.join(output_folder, f"processed_{video_file}")

        cap = cv2.VideoCapture(input_path)
        if not cap.isOpened():
            continue

        fps = int(cap.get(cv2.CAP_PROP_FPS))
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

        frame_count = 0

        while True:
            ret, frame = cap.read()
            if not ret:
                break

            results = model(frame, conf=conf_threshold, verbose=False)
            annotated_frame = results[0].plot()
            out.write(annotated_frame)

            frame_count += 1

        cap.release()
        out.release()


model_path = "/content/drive/MyDrive/hvac-test/runs/detect/train/weights/best.pt"
input_folder = "/content/drive/MyDrive/hvac-test/hvacs-dataset/Videos/"
output_folder = "/content/drive/MyDrive/hvac-test/hvacs-dataset/Inferences/"

process_videos_with_yolo(
    model_path = model_path,
    input_videos_folder = input_folder,
    output_folder = output_folder,
    conf_threshold=0.5
)


mkdir: cannot create directory ‘/content/drive/MyDrive/hvac-test/hvacs-dataset/Inferences/’: File exists
