In [16]:
pip install labelme

python(87790) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


Collecting labelme
  Downloading labelme-5.8.1-py3-none-any.whl.metadata (1.3 kB)
Collecting imgviz (from labelme)
  Downloading imgviz-1.7.6-py3-none-any.whl.metadata (6.2 kB)
Collecting loguru (from labelme)
  Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)
Collecting natsort>=7.1.0 (from labelme)
  Using cached natsort-8.4.0-py3-none-any.whl.metadata (21 kB)
Collecting osam>=0.2.3 (from labelme)
  Downloading osam-0.2.3-py3-none-any.whl.metadata (5.3 kB)
Collecting pyqt5>=5.14.0 (from labelme)
  Downloading PyQt5-5.15.11-cp38-abi3-macosx_11_0_arm64.whl.metadata (2.1 kB)
Collecting scikit-image (from labelme)
  Downloading scikit_image-0.25.2-cp310-cp310-macosx_12_0_arm64.whl.metadata (14 kB)
Collecting onnxruntime!=1.16.0,!=1.20.0,!=1.20.1,>=1.14.1 (from osam>=0.2.3->labelme)
  Downloading onnxruntime-1.21.0-cp310-cp310-macosx_13_0_universal2.whl.metadata (4.5 kB)
Collecting PyQt5-sip<13,>=12.15 (from pyqt5>=5.14.0->labelme)
  Downloading PyQt5_sip-12.17.0-cp310-cp310-mac

# 1. spills are annotated with polygon in labelme

# 2. polygon annotations are saved in json format. 

# 3. From json annotation binary mask is created using below code 

In [None]:
import os
import json
import numpy as np
from PIL import Image
from labelme import utils

# Paths
json_dir = "image_seg/label_json"
output_img_dir = "image_seg/images"
output_mask_dir = "image_seg/masks"

# Create output directories
os.makedirs(output_img_dir, exist_ok=True)
os.makedirs(output_mask_dir, exist_ok=True)

# Loop through all JSON files
for filename in os.listdir(json_dir):
    if filename.endswith(".json"):
        json_path = os.path.join(json_dir, filename)

        with open(json_path, "r") as f:
            data = json.load(f)

        # Decode image
        image = utils.img_b64_to_arr(data["imageData"])
        
        # Create label mask
        label_name_to_value = {"_background_": 0}
        for shape in data["shapes"]:
            label_name = shape["label"]
            if label_name not in label_name_to_value:
                label_name_to_value[label_name] = len(label_name_to_value)

        lbl, _ = utils.shapes_to_label(image.shape, data["shapes"], label_name_to_value)

        # Save original image
        img_name = filename.replace(".json", ".png")
        Image.fromarray(image).save(os.path.join(output_img_dir, img_name))

        # Save corresponding mask
        Image.fromarray(lbl.astype(np.uint8)).save(os.path.join(output_mask_dir, img_name))

print("✅ Conversion complete. Check 'images' and 'masks' folders.")


✅ Conversion complete. Check 'images' and 'masks' folders.


In [None]:
import os

images_folder = 'image_seg/images'
masks_folder = 'image_seg/masks'

# Get the list of images and masks
images = os.listdir(images_folder)
masks = os.listdir(masks_folder)

# Check if each image has a corresponding mask
for image in images:
    image_name, _ = os.path.splitext(image)
    mask_name = image_name + '.png'  # Masks have the same name as images
    
    if mask_name not in masks:
        print(f"❌ Warning: No mask found for {image}")


# 4. U-Net segementation model training

In [None]:
import os
import numpy as np
import cv2
from tensorflow.keras.utils import Sequence
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt


IMAGE_DIR = "image_seg/images"
MASK_DIR = "image_seg/masks"
IMG_SIZE = (256, 256)  # Resize everything to this
BATCH_SIZE = 8
EPOCHS = 25

# DATA GENERATOR 
class SpillDataset(Sequence):
    def __init__(self, image_filenames, image_dir, mask_dir, batch_size, img_size):
        self.image_filenames = image_filenames
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.batch_size = batch_size
        self.img_size = img_size

    def __len__(self):
        return int(np.ceil(len(self.image_filenames) / self.batch_size))

    def __getitem__(self, idx):
        batch_files = self.image_filenames[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_imgs = []
        batch_masks = []

        for fname in batch_files:
            img = cv2.imread(os.path.join(self.image_dir, fname))
            img = cv2.resize(img, self.img_size)
            img = img / 255.0

            mask = cv2.imread(os.path.join(self.mask_dir, fname), cv2.IMREAD_GRAYSCALE)
            mask = cv2.resize(mask, self.img_size)
            mask = (mask > 0).astype(np.float32)  # Binary mask

            batch_imgs.append(img)
            batch_masks.append(np.expand_dims(mask, axis=-1))

        return np.array(batch_imgs), np.array(batch_masks)

#  U-NET ARCHITECTURE 
def build_unet(input_size=(256, 256, 3)):
    inputs = layers.Input(input_size)

    def conv_block(x, filters):
        x = layers.Conv2D(filters, 3, padding="same", activation="relu")(x)
        x = layers.Conv2D(filters, 3, padding="same", activation="relu")(x)
        return x

    def encoder_block(x, filters):
        f = conv_block(x, filters)
        p = layers.MaxPooling2D((2, 2))(f)
        return f, p

    def decoder_block(x, skip, filters):
        x = layers.Conv2DTranspose(filters, 2, strides=2, padding="same")(x)
        x = layers.concatenate([x, skip])
        x = conv_block(x, filters)
        return x

    f1, p1 = encoder_block(inputs, 64)
    f2, p2 = encoder_block(p1, 128)
    f3, p3 = encoder_block(p2, 256)
    f4, p4 = encoder_block(p3, 512)

    bottleneck = conv_block(p4, 1024)

    d1 = decoder_block(bottleneck, f4, 512)
    d2 = decoder_block(d1, f3, 256)
    d3 = decoder_block(d2, f2, 128)
    d4 = decoder_block(d3, f1, 64)

    outputs = layers.Conv2D(1, (1, 1), activation="sigmoid")(d4)

    model = models.Model(inputs, outputs)
    return model

# --- SPLIT DATA ---
all_files = os.listdir(IMAGE_DIR)
train_files, val_files = train_test_split(all_files, test_size=0.2, random_state=42)

train_gen = SpillDataset(train_files, IMAGE_DIR, MASK_DIR, BATCH_SIZE, IMG_SIZE)
val_gen = SpillDataset(val_files, IMAGE_DIR, MASK_DIR, BATCH_SIZE, IMG_SIZE)

# --- COMPILE AND TRAIN ---
model = build_unet(input_size=(256, 256, 3))
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

history = model.fit(train_gen,
                    validation_data=val_gen,
                    epochs=EPOCHS)


model.save("spill_unet_model.h5")


plt.plot(history.history["accuracy"], label="train_acc")
plt.plot(history.history["val_accuracy"], label="val_acc")
plt.legend()
plt.title("Training Accuracy")
plt.show()


Epoch 1/25
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 12s/step - accuracy: 0.8919 - loss: 0.5569 - val_accuracy: 0.9157 - val_loss: 0.4019
Epoch 2/25
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 14s/step - accuracy: 0.8835 - loss: 0.4333 - val_accuracy: 0.9157 - val_loss: 0.3264
Epoch 3/25
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 16s/step - accuracy: 0.9007 - loss: 0.3601 - val_accuracy: 0.9157 - val_loss: 0.3154
Epoch 4/25
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 14s/step - accuracy: 0.8996 - loss: 0.3478 - val_accuracy: 0.9157 - val_loss: 0.3034
Epoch 5/25
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 15s/step - accuracy: 0.8820 - loss: 0.3539 - val_accuracy: 0.9157 - val_loss: 0.2818
Epoch 6/25
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 14s/step - accuracy: 0.8805 - loss: 0.3441 - val_accuracy: 0.9157 - val_loss: 0.2925
Epoch 7/25
[1m8/8[0m [32m━━━━━━━━━━━━



<Figure size 640x480 with 1 Axes>

# 5. Now inferencing the result with first yolo model.
# Then cropping the bounding box of detected spill given as input to the U-net model. 
# U-net model will segemnt the spill region and calculate the center point of the segmentated region to give accurate location of the spill.

In [26]:
import tensorflow as tf


In [None]:
import os
import cv2
import numpy as np
from ultralytics import YOLO
from tensorflow.keras.models import load_model
import csv


yolo_model_path = 'yolov8m_finetune_wt/yolov8m.pt'
unet_model_path = 'spill_unet_model.h5'
input_image_dir = 'images_/val'  
output_image_dir = 'final_test_output_images/'  
csv_file_path = 'test_output_coordinates.csv'  


os.makedirs(output_image_dir, exist_ok=True)

# LOAD MODELS 
yolo_model = YOLO(yolo_model_path)
unet_model = load_model(unet_model_path)


def get_center_of_mask(mask):
    coords = cv2.findNonZero((mask > 0.5).astype(np.uint8))
    if coords is not None:
        x, y, w, h = cv2.boundingRect(coords)
        cx = x + w // 2
        cy = y + h // 2
        return (cx, cy)
    return None

#CSV SETUP
with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['image_name', 'spill_location_x', 'spill_location_y'])  # Column headers

#PROCESS ALL IMAGES IN TESTING DIRECTORY 
for image_name in os.listdir(input_image_dir):
    # Check if the file is an image (can be modified to suit file types)
    if image_name.endswith(('.png', '.jpg', '.jpeg')):
        image_path = os.path.join(input_image_dir, image_name)
        
        # LOAD IMAGE 
        image = cv2.imread(image_path)
        original = image.copy()
        h_orig, w_orig = image.shape[:2]

        #  YOLO INFERENCE 
        results = yolo_model(image_path, conf=0.2)[0]

        # APPLY NON-MAXIMUM SUPPRESSION (NMS)
        # Filter out detections based on confidence threshold
        results = results.cpu()  # Move to CPU for further processing
        boxes = results.boxes.xyxy  # Bounding box coordinates (x1, y1, x2, y2)
        scores = results.boxes.conf  # Confidence scores
        labels = results.boxes.cls  # Class labels

        # NMS operation
        nms_indices = cv2.dnn.NMSBoxes(boxes.tolist(), scores.tolist(), score_threshold=0.2, nms_threshold=0.4)

        # If no boxes are selected, continue with next image
        if len(nms_indices) == 0:
            print(f"⚠️ No valid boxes after NMS in {image_name}.")
            continue

        #  PROCESS ALL DETECTED BOXES AFTER NMS
        for i in nms_indices.flatten():
            box = boxes[i]
            x1, y1, x2, y2 = map(int, box)
            cropped = image[y1:y2, x1:x2]

            #  U-NET PREPROCESS 
            cropped_resized = cv2.resize(cropped, (256, 256))
            input_tensor = cropped_resized / 255.0  # Normalize
            input_tensor = np.expand_dims(input_tensor, axis=0)

            #  PREDICT SEGMENTATION 
            pred_mask = unet_model.predict(input_tensor)[0, :, :, 0]

            #  RESIZE MASK BACK TO CROP SIZE
            pred_mask_resized = cv2.resize(pred_mask, (x2 - x1, y2 - y1))

            #  CREATE BINARY MASK USING LOWER THRESHOLD 
            binary_mask = (pred_mask_resized > 0.1).astype(np.uint8)  # Lowered threshold

            #  FIND CENTER OF MASK 
            center = get_center_of_mask(binary_mask)
            if center:
                cx, cy = center
                cx += x1
                cy += y1

                #  DRAW YOLO BOUNDING BOX AND SPILL LOCATION 
                cv2.rectangle(original, (x1, y1), (x2, y2), (0, 255, 0), 4)  # Bounding box with thickness of 4
                cv2.circle(original, (cx, cy), 10, (0, 0, 255), -1)  # Larger red circle for center
                cv2.putText(original, f"spill_location: ({cx}, {cy})", (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)  # Larger text size and thickness

                #  SAVE SPILL LOCATION COORDINATES TO CSV 
                with open(csv_file_path, mode='a', newline='') as file:
                    writer = csv.writer(file)
                    writer.writerow([image_name, cx, cy])

        #  SAVE FINAL IMAGE 
        output_image_path = os.path.join(output_image_dir, f"output_{image_name}")
        cv2.imwrite(output_image_path, original)  

        print(f"✅ Processed and saved {image_name}.")

print(f"✅ Final images saved in {output_image_dir}")
print(f"✅ Spill location coordinates saved in {csv_file_path}")





image 1/1 /Users/cilvosimon/Desktop/data_files/images_/val/14_aug.jpg: 640x640 1 spill, 298.6ms
Speed: 4.3ms preprocess, 298.6ms inference, 11.4ms postprocess per image at shape (1, 3, 640, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 539ms/step
✅ Processed and saved 14_aug.jpg.

image 1/1 /Users/cilvosimon/Desktop/data_files/images_/val/50_aug.jpg: 640x640 1 spill, 341.7ms
Speed: 0.9ms preprocess, 341.7ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 351ms/step
✅ Processed and saved 50_aug.jpg.

image 1/1 /Users/cilvosimon/Desktop/data_files/images_/val/40_aug.jpg: 640x640 1 spill, 265.4ms
Speed: 0.9ms preprocess, 265.4ms inference, 0.3ms postprocess per image at shape (1, 3, 640, 640)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 351ms/step
✅ Processed and saved 40_aug.jpg.

image 1/1 /Users/cilvosimon/Desktop/data_files/images_/val/17.jpg: 640x480 1 spill, 261.4ms
