# You shall use annotated images in this notebook

You should first download the Labelstudio repo  
Go to the tutorial and annotate images

# Data pre-processing : sub-images algorithm and train-test-val split 

## CUT-IMG algorithm

In [3]:
from datetime import datetime
import cv2
import os

def calculate_splits(W, H, subim_size, tol_width, tol_height, tol_xy):
    N = (W + subim_size -1) // subim_size
    M = (H + subim_size -1) // subim_size

    while True:
        if N==0 or M==0:
            raise ValueError(f"The passed image width and height need to be strictly positive.")
        if N==1 or W >= N * subim_size:
            overlap_width = 0       # no overlap if only one image or if enough space for all subimages
        else:
            overlap_width = abs((W - N * subim_size) / (N-1))
        if M==1 or H >= M * subim_size:
            overlap_height = 0      # same along height axis
        else:
            overlap_height = abs((H - M * subim_size) / (M-1))
        overlap_area_width = subim_size * overlap_width
        overlap_area_height = subim_size * overlap_height
        overlap_shared_area = overlap_height*overlap_width if N > 0 and M > 0 else 0

        if overlap_area_width <= tol_width * subim_size ** 2:
            if overlap_area_height <= tol_height * subim_size ** 2:
                if overlap_shared_area <= tol_xy * subim_size ** 2:  # go out of the loop if all thresholds are satisfied
                    print(f"Overlapping width area : {100*overlap_width/subim_size}%")
                    print(f"Overlapping height area : {100*overlap_height/subim_size}%")
                    print(f"Overlapping shared area : {100*overlap_shared_area/(subim_size**2)}%")
                    break
                else: # update N or M based on which threshold violation is greater
                    if tol_height * subim_size ** 2 - overlap_area_height < tol_width * subim_size ** 2 - overlap_area_width:
                        M -= 1
                    else:
                        N -= 1
            else:
                M -= 1
        else:
            N -= 1

    return N, M, overlap_width, overlap_height

def slice_image_and_boxes(image, bounding_boxes, W, H, subim_size, N, M, overlap_width, overlap_height, tol_bbox):
    subimages = []
    subimage_boxes = []

    step_x = subim_size - overlap_width
    step_y = subim_size - overlap_height

    for i in range(M):
        for j in range(N):
            # Calculate subimage boundaries
            start_x = int(j * step_x)
            start_y = int(i * step_y)
            end_x = min(start_x + subim_size, W)
            end_y = min(start_y + subim_size, H)

            # Extract subimage
            subimage = image[start_y:end_y, start_x:end_x]
            subimages.append(subimage)

            # Adjust bounding boxes for subimage
            subimage_bboxes = []
            for bbox in bounding_boxes:
                nw_x, nw_y, se_x, se_y = bbox    
                if se_x < start_x or nw_x > end_x or se_y < start_y or nw_y > end_y:    # skip if the bounding box is outside the subimage
                    continue
                else :
                    # Calculate the clipped bounding box coordinates
                    clipped_nw_x = max(nw_x, start_x) - start_x
                    clipped_nw_y = max(nw_y, start_y) - start_y
                    clipped_se_x = min(se_x, end_x) - start_x
                    clipped_se_y = min(se_y, end_y) - start_y

                    if (clipped_se_x - clipped_nw_x)*(clipped_se_y - clipped_nw_y) < tol_bbox: # do not keep clipped bbox if area is less than tol_bbox pixels
                        continue
                    else:
                        x = (clipped_nw_x + clipped_se_x) / 2
                        w = clipped_se_x - clipped_nw_x
                        y = (clipped_nw_y + clipped_se_y) / 2
                        h = clipped_se_y - clipped_nw_y
                        subimage_bboxes.append([x/subim_size, y/subim_size, w/subim_size, h/subim_size])
                        # subimage_bboxes.append([clipped_nw_x, clipped_nw_y, clipped_se_x, clipped_se_y])
            
            # do not keep subimages without oranges
            if subimage_bboxes==[]:
                subimages.pop()
            else:
                subimage_boxes.append(subimage_bboxes)

    return subimages, subimage_boxes

def load_bbox(file_path, H, W):
    with open(file_path, 'r') as file:
        lines = file.readlines()
        annotations = [list(map(float, line.strip().split()[1:])) for line in lines]

    bounding_boxes = []
    for annotation in annotations:
        x, y, w, h = annotation
        x = int(x * W)
        y = int(y * H)
        w = int(w * W)
        h = int(h * H)
        nw_x = x - w // 2           
        se_x = x +  w // 2          # top left corner       
        nw_y = y - h // 2           # bottom right corner
        se_y = y +  h // 2
        bounding_boxes.append([nw_x, nw_y, se_x, se_y])

    return bounding_boxes

def main(SAVE_PATH, image_path, boxes_path, subim_size, weather_type, tol_width, tol_height, tol_xy, tol_bbox):
    image = cv2.imread(image_path)
    H, W = image.shape[:2]
    bounding_boxes = load_bbox(boxes_path, H, W)

    N, M, overlap_width, overlap_height = calculate_splits(W, H, subim_size, tol_width, tol_height, tol_xy)
    print(f"Splits: {N} along width, {M} along height")
    print(f"Overlap: {overlap_width} pixels along width, {overlap_height} pixels along height")

    subimages, subimage_boxes = slice_image_and_boxes(image, bounding_boxes, W, H, subim_size, N, M, overlap_width, overlap_height, tol_bbox)

    os.makedirs(SAVE_PATH + "images", exist_ok=True)
    os.makedirs(SAVE_PATH + "labels", exist_ok=True)

    for idx, (subimage, boxes) in enumerate(zip(subimages, subimage_boxes)):
        subimage_filename = f"{weather_type}_{datetime.now().strftime('%Y-%m-%d')}_{str(idx).zfill(2)}.jpg"
        cv2.imwrite(SAVE_PATH+"images/"+subimage_filename, subimage)

        boxes_filename = SAVE_PATH+"labels/"+f"{weather_type}_{datetime.now().strftime('%Y-%m-%d')}_{str(idx).zfill(2)}.txt"
        with open(boxes_filename, 'w') as f:
            for box in boxes:
                f.write(f"0 {box[0]} {box[1]} {box[2]} {box[3]}\n")


### Usage example : imagine you are working with images of different resolutions.  
### You want to create sub-images, split into train-test-val and provide the COCO format.

In [None]:
PATH = "" # FINAL_ANNOTATED_FOLDER_FROM_ROBOFLOW
SAVE_PATH = "output_folder"  
for id, filename in enumerate(os.listdir(PATH+"images")):
    if filename.endswith(".jpg"):
        img_path = PATH+"images/"+filename
        labels_path = PATH+"labels/"+filename.replace(".jpg", ".txt")
        main(SAVE_PATH, img_path, labels_path, subim_size=640, weather_type="AS"+str(id).zfill(3), tol_width=0.3, tol_height=0.3, tol_xy=0.2, tol_bbox=30)

## Train-test-val split

In [None]:
from img_bbox import train_test_val_folders

# will split the images from the AS folder into three subfolders: train, test and val
TRAIN_SPLIT = 0.7
VAL_SPLIT = 0.15
train_test_val_folders("output_folder/", TRAIN_SPLIT, VAL_SPLIT) # TEST_SPLIT will be 0.15

# COCO format (still with AS example)

In [None]:
from yolo_to_coco import all_yolo_to_coco

PATH="output_folder/"

images_dir = PATH+"train/images"
labels_dir = PATH+"train/labels"
output_file = PATH+"train/annotations.json"
all_yolo_to_coco(images_dir, labels_dir, output_file)

images_dir = PATH+"val/images"
labels_dir = PATH+"val/labels"
output_file = PATH+"val/annotations.json"
all_yolo_to_coco(images_dir, labels_dir, output_file)

images_dir = PATH+"test/images"
labels_dir = PATH+"test/labels"
output_file = PATH+"test/annotations.json"
all_yolo_to_coco(images_dir, labels_dir, output_file)