In [1]:
from ultralytics import YOLO, SAM
import os
import cv2

In [2]:
import torch

def get_bounding_box(mask):
    # Convert tensor to numpy array if it's a PyTorch tensor
    if isinstance(mask, torch.Tensor):
        mask = mask.cpu().numpy()
   
    _ , height, width = mask.shape  
    min_x, min_y = width, height
    max_x, max_y = 0, 0
    
    for y in range(height):
        for x in range(width):
            if mask[0, y, x] == True:  
                min_x = min(min_x, x)
                min_y = min(min_y, y)
                max_x = max(max_x, x)
                max_y = max(max_y, y)
    
    # If no True pixels found, return None
    if min_x == width or min_y == height or max_x == 0 or max_y == 0:
        print("No 'True' pixels found in the mask.")
        return None
     
    width_bb = max_x - min_x + 1
    height_bb = max_y - min_y + 1
    
    return min_x, min_y, width_bb, height_bb

In [3]:
def crop_image(image_path, min_x, min_y, max_x, max_y):
    image = cv2.imread(image_path)
    cropped_image = image[min_y:max_y, min_x:max_x]
    return cropped_image

def extract_frames(video_path, output_folder, focus_point, auto_crop=False):
    vidcap = cv2.VideoCapture(video_path)
    success, image = vidcap.read()
    count = 0
    prefix = video_path.split(".")[0]
    total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
    num_digits = len(str(total_frames))
    if auto_crop:
        sam = SAM('sam_l.pt')
        cropped_output_folder = f"{output_folder}_cropped"
        while success:
            filename = f"{prefix}_{count:0{num_digits}}.jpg"
            cv2.imwrite(output_folder + "/" + filename, image)  # save frame as JPEG file
            result = sam(output_folder + "/" + filename, points=focus_point)
            bbox = get_bounding_box(result[0].masks.data)
            min_x, min_y, width, height = bbox
            cropped_image = crop_image(output_folder + "/" + filename,min_x, min_y, min_x+width, min_y+height)
            cv2.imwrite(f"{cropped_output_folder}/{filename}", cropped_image)
            success, image = vidcap.read()
            print('Saved frame %s' % filename)
            count += 1
    else: 
         while success:
            filename = f"{prefix}_{count:0{num_digits}}"
            cv2.imwrite(output_folder + "/" + filename, image)  # save frame as JPEG file
            success, image = vidcap.read()
            print('Saved frame %s' % filename)
            count += 1


In [4]:
def crop_images(image_folder, output_folder, focus_point):
    sam = SAM('sam_l.pt')
    image_files = os.listdir(image_folder)
    image_paths = [os.path.join(image_folder, file) for file in image_files]
    for path in image_paths:
        
        filename = path.split("\\")[-1]
        filename = filename.split(".")[0]
        
        result = sam(path, points=focus_point)
        bbox = get_bounding_box(result[0].masks.data)
        min_x, min_y, width, height = bbox
        cropped_image = crop_image(path,min_x, min_y, min_x+width, min_y+height)
        print(f"{output_folder}/{filename}")
        cv2.imwrite(f"{output_folder}/{filename}.jpg", cropped_image)



In [5]:

def process_video(video_path, save_dir_images, project, sub_project, focus_point, auto_crop=False):
    extract_frames(video_path=video_path, output_folder=save_dir_images,focus_point=focus_point, auto_crop=auto_crop)
    yolo =  YOLO("../runs/detect/train31/weights/best.pt")
    
    if auto_crop:
        image_files = os.listdir(f"{save_dir_images}_cropped")
        image_paths = [os.path.join(f"{save_dir_images}_cropped", file) for file in image_files]
    else:
        image_file = os.listdir(save_dir_images)
        image_paths = [os.path.join(save_dir_images, file) for file in image_files]

    print(image_paths)
    results = yolo(image_paths, stream=True, save_txt=True, project=project, name=sub_project)
    for result in results:
        boxes = result.boxes  # Boxes object for bbox outputs

In [10]:
PATH_YOLO = "best.pt"
def process_folder(data_folder, output_folder, project, sub_project,focus_point, auto_crop=False):
    yolo =  YOLO(PATH_YOLO)
    if auto_crop:
        crop_images(data_folder, output_folder=f"{output_folder}_cropped", focus_point=focus_point)
    if auto_crop:
        image_files = os.listdir(f"{output_folder}_cropped")
        image_paths = [os.path.join(f"{output_folder}_cropped", file) for file in image_files]
    else:
        image_files = os.listdir(data_folder)
        image_paths = [os.path.join(data_folder, file) for file in image_files]

    
    results = yolo(image_paths, stream=True, save_txt=True, project=project, name=sub_project)
    for result in results:
        boxes = result.boxes  # Boxes object for bbox outputs

In [11]:
PROJECT = "AutoAnnotation"
SUB_PROJECT = "auto_labels"
VIDEO_PATH = "testy.mp4"
test_vid = "TEST.mp4"
OUTPUT_FOLDER = "images_test"
FOCUS_POINT = [1100,1400]

process_folder(data_folder="test_images",
                output_folder=OUTPUT_FOLDER, 
                project=PROJECT, 
                sub_project=SUB_PROJECT,
                focus_point=FOCUS_POINT,
                auto_crop=True)


image 1/1 c:\Users\idi40\RDD\RDD\test_images\Norway_000501.jpg: 1024x1024 1020.4ms
Speed: 75.0ms preprocess, 1020.4ms inference, 13.0ms postprocess per image at shape (1, 3, 1024, 1024)
images_test_cropped/Norway_000501

image 1/1 c:\Users\idi40\RDD\RDD\test_images\Norway_000502.jpg: 1024x1024 281.0ms
Speed: 4.0ms preprocess, 281.0ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1024)
images_test_cropped/Norway_000502

image 1/1 c:\Users\idi40\RDD\RDD\test_images\Norway_000503.jpg: 1024x1024 334.0ms
Speed: 4.0ms preprocess, 334.0ms inference, 0.0ms postprocess per image at shape (1, 3, 1024, 1024)
images_test_cropped/Norway_000503

image 1/1 c:\Users\idi40\RDD\RDD\test_images\Norway_000504.jpg: 1024x1024 323.0ms
Speed: 4.0ms preprocess, 323.0ms inference, 1.0ms postprocess per image at shape (1, 3, 1024, 1024)
images_test_cropped/Norway_000504

image 1/1 c:\Users\idi40\RDD\RDD\test_images\Norway_000505.jpg: 1024x1024 330.0ms
Speed: 4.0ms preprocess, 330.0ms inference, 1

In [8]:
process_video(video_path=test_vid, 
              save_dir_images=OUTPUT_FOLDER,
              project=PROJECT, 
              sub_project=SUB_PROJECT,
              focus_point=FOCUS_POINT, 
              auto_crop=True)

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/sam_l.pt to 'sam_l.pt'...


100%|██████████| 1.16G/1.16G [00:28<00:00, 43.4MB/s]





FileNotFoundError: images_test/TEST_00.jpg does not exist