# **FLOATING OBJECT DETECTION**

**About the dataset**


1. Dataset size?
2. Size of images?
3. How many categories?
4. Exist annotation file with no data
5. Six categories: human, wind/sup-board, boat, bouy, sailboat, kayak


**[Download dataset](https://www.kaggle.com/datasets/jangsienicajzkowy/afo-aerial-dataset-of-floating-objects/data)**

In [27]:
import shutil
from pathlib import Path

**Data path**

In [28]:
# Image path of PART 1,2,3
img_path_1 = 'dataset/PART_1/PART_1/images/'
img_path_2 = 'dataset/PART_2/PART_2/images/'
img_path_3 = 'dataset/PART_3/PART_3/images/'

# Categories path
# Categories: human, wind/sup-board, boat, bouy, sailboat, kayak
categories_path = 'dataset/PART_1/PART_1/6categories/'

**Split Data into Train, Test & Validation**

In [29]:
# Split into three parts: the training (67,4% of objects), the test (19,12% of objects),
# and the validation set (13,48% of objects). In order to prevent overfitting of the model to the given data,
# the test set contains selected frames from nine videos that were not used in either the training or validation sets.

# Split image to : dataset/working/images
# Split annotation to: dataset/working/labels

def split_data(file_list, img_path, ann_path, mode):
    #Check if we have our mode folders
    images_working_folder = Path( 'dataset/working/images/'+  mode)
    if not images_working_folder.exists():
        print(f"Path {images_working_folder} does not exit")
        os.makedirs(images_working_folder)

    labels_working_folder = Path('dataset/working/labels/' + mode)
    if not labels_working_folder.exists():
        print(f"Path {labels_working_folder} does not exit")
        os.makedirs(labels_working_folder)

    #Creates the name of our label file from the img name and creates our source file
    for file in file_list:
        name = file.replace('.jpg', '')
        img_src_file = str(img_path) + '/' + name + '.jpg'
        annot_src_file = str(ann_path) + '/' + name + '.txt'
        
        if Path(img_src_file).exists() and Path(annot_src_file).exists():
            #move image
            IMG_DIR = 'dataset/working/images/' + mode
            img_dest_file = str(IMG_DIR) + '/' + name + '.jpg'
            if os.path.isfile(img_src_file) and not Path(img_dest_file).exists():
                shutil.move(img_src_file, img_dest_file)
    
            # Copy annotations
            ANNOT_DIR = 'dataset/working/labels/' + mode
            annot_dest_file = str(ANNOT_DIR) + '/' + name + '.txt'
            if os.path.isfile(annot_src_file) and not Path(annot_dest_file).exists():
                shutil.move(annot_src_file, annot_dest_file)

In [30]:
#Get our images list
train_imgs = 'dataset/PART_1/PART_1/train.txt'
test_imgs = 'dataset/PART_1/PART_1/test.txt'
val_imgs = 'dataset/PART_1/PART_1/validation.txt'
with open(train_imgs, 'r') as f:
    train_img_list = [line.strip() for line in f.readlines()]

with open(test_imgs, 'r') as f:
    test_img_list = [line.strip() for line in f.readlines()]

with open(val_imgs, 'r') as f:
    val_img_list = [line.strip() for line in f.readlines()]

print(train_img_list[0], test_img_list[0], val_img_list[0])

a_102.jpg k2_38.jpg a_101.jpg


In [31]:
# Root path
root_img_path = Path('dataset/images/')
root_ann_path = Path('dataset/annotations/')

#Split Data
split_data(train_img_list, root_img_path, root_ann_path, 'train')
split_data(test_img_list, root_img_path, root_ann_path, 'test')
split_data(val_img_list, root_img_path, root_ann_path, 'val')

In [32]:
import glob
import os
working_image_path = 'dataset/working/images/'
working_labels_path = 'dataset/working/labels/'

# Images
img_test_path = glob.glob(os.path.join(working_image_path + '/test/' , "*.jpg"))
print(f'img_test_path: {len(img_test_path)}')

img_train_path = glob.glob(os.path.join(working_image_path + '/train/' , "*.jpg"))
print(f'img_train_path: {len(img_train_path)}')

img_val_path = glob.glob(os.path.join(working_image_path + '/val/' , "*.jpg"))
print(f'img_val_path: {len(img_val_path)}')

# Labels
label_test_path = glob.glob(os.path.join(working_labels_path + '/test/' , "*.txt"))
print(f'label_test_path: {len(label_test_path)}')

label_train_path = glob.glob(os.path.join(working_labels_path + '/train/' , "*.txt"))
print(f'label_train_path: {len(label_train_path)}')

label_val_path = glob.glob(os.path.join(working_image_path + '/val/' , "*.txt"))
print(f'label_val_path: {len(label_val_path)}')

img_test_path: 514
img_train_path: 2787
img_val_path: 339
label_test_path: 514
label_train_path: 2787
label_val_path: 0


### **Train model**

In [33]:
from skimage.util import img_as_float
from skimage.segmentation import felzenszwalb, slic
import os
import cv2
import numpy as np
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import pickle
import matplotlib.pyplot as plt
from tqdm import tqdm
import datetime
from skimage.morphology import closing, square

class CustomObjectDetector:
    def __init__(self, n_components=50, n_clusters=20, sliding_window_sizes=[(100, 100), (150, 150), (200, 200)], 
                 window_step=50, confidence_threshold=0.8):
        """
        Initialize the custom object detector.
        
        Args:
            n_components: Number of PCA components
            n_clusters: Number of clusters for feature detection
            sliding_window_sizes: List of window sizes for sliding window detection
            window_step: Step size for sliding window
            confidence_threshold: Threshold for detection confidence
        """
        self.n_components = n_components
        self.n_clusters = n_clusters
        self.sliding_window_sizes = sliding_window_sizes
        self.window_step = window_step
        self.confidence_threshold = confidence_threshold
        
        # Initialize the models
        self.pca = PCA(n_components=n_components)
        self.kmeans = KMeans(n_clusters=n_clusters)
        self.classifier = RandomForestClassifier(n_estimators=100, random_state=42)
        
        # Class names for visualization
        self.class_names = ['human', 'wind/sup-board', 'boat', 'buoy', 'sailboat', 'kayak']
        
        # Colors for visualization (one for each class)
        self.colors = [
            (0, 255, 0),    # Green for human
            (255, 0, 0),    # Blue for wind/sup-board
            (0, 0, 255),    # Red for boat
            (255, 255, 0),  # Cyan for buoy
            (255, 0, 255),  # Magenta for sailboat
            (0, 255, 255)   # Yellow for kayak
        ]
    
    def train(self, image_folder, annotation_folder):
        """
        Train the object detector.
        
        Args:
            image_folder: Path to the image folder
            annotation_folder: Path to the annotation folder
        """
        print("Loading training data...")
        X_train, y_train, _, _ = self.load_dataset(image_folder, annotation_folder, 'train', debug_visualization=False)
        
        if len(X_train) == 0:
            raise ValueError("No training data found. Check your paths and data format.")

        print(f"Loaded {len(X_train)} training samples")

        # Apply PCA for dimensionality reduction
        print("Applying PCA...")
        X_train_pca = self.pca.fit_transform(X_train)

        # Train k-means for feature clustering
        print("Training K-means clustering...")
        self.kmeans.fit(X_train_pca)

        # Add cluster information to features
        cluster_features = self.kmeans.transform(X_train_pca)
        X_train_with_clusters = np.hstack([X_train_pca, cluster_features])

        # Train the classifier
        print("Training the classifier...")
        self.classifier.fit(X_train_with_clusters, y_train)

        # Evaluate on validation set if available
        try:
            print("Evaluating on test data...")
            # X_val, y_val, _, _ = self.load_dataset(image_folder, annotation_folder, 'val')
            X_test, y_test, _, _ = self.load_dataset(image_folder, annotation_folder, 'test')
            if len(X_test) > 0:
                X_test_pca = self.pca.transform(X_test)
                cluster_test = self.kmeans.transform(X_test_pca)
                X_test_with_clusters = np.hstack([X_test_pca, cluster_test])
                y_pred = self.classifier.predict(X_test_with_clusters)
                print("Test accuracy:", accuracy_score(y_test, y_pred))
                print("\nClassification Report:")
                print(classification_report(y_test, y_pred, target_names=self.class_names))
        except Exception as e:
            print(f"Could not evaluate on validation data: {e}")
    
    
    def train_with_background(self, image_folder, annotation_folder):
        """
        Train classifier with an explicit background class to improve discrimination.
        This is an advanced enhancement that requires creating negative samples.
        
        Args:
            image_folder: Path to the image folder
            annotation_folder: Path to the annotation folder
        """
        print("Loading training data...")
        X_train, y_train, _, _ = self.load_dataset(image_folder, annotation_folder, 'train')
        
        # Get hard negative examples (background images that might confuse the classifier)
        print("Mining hard negative examples...")
        X_hard_neg, y_hard_neg = self.mine_hard_negatives(image_folder, annotation_folder)
        
        # Combine regular training data with hard negatives
        X_combined = np.vstack([X_train, X_hard_neg]) if len(X_hard_neg) > 0 else X_train
        y_combined = np.concatenate([y_train, y_hard_neg]) if len(y_hard_neg) > 0 else y_train
        
        print(f"Training with {len(X_train)} regular examples and {len(X_hard_neg) if len(X_hard_neg) > 0 else 0} hard negative examples")
        
        # Apply PCA for dimensionality reduction
        print("Applying PCA...")
        X_train_pca = self.pca.fit_transform(X_combined)
        
        # Train k-means for feature clustering
        print("Training K-means clustering...")
        self.kmeans.fit(X_train_pca)
        
        # Add cluster information to features
        cluster_features = self.kmeans.transform(X_train_pca)
        X_train_with_clusters = np.hstack([X_train_pca, cluster_features])
        
        # Train the classifier
        print("Training the classifier with background class...")
        self.classifier.fit(X_train_with_clusters, y_combined)
        
        
    
    def load_dataset(self, image_folder, annotation_folder, split_type='train', debug_visualization=False):
        """
        Load the dataset.
        
        Args:
            image_folder: Path to the image folder
            annotation_folder: Path to the annotation folder
            split_type: 'train', 'test', or 'val'
            
        Returns:
            X: Features
            y: Labels
            bbox_data: Bounding box information
            image_paths: Paths to the images
        """
        print("Loading dataset...")
        X = []
        y = []
        bbox_data = []
        image_paths = []
        
        img_dir = os.path.join(image_folder, split_type)
        ann_dir = os.path.join(annotation_folder, split_type)
        
        image_files = [f for f in os.listdir(img_dir) if f.endswith('.jpg')]
        
        for img_file in tqdm(image_files):
            # Get the corresponding annotation file
            ann_file = os.path.splitext(img_file)[0] + '.txt'
            ann_path = os.path.join(ann_dir, ann_file)
            
            # Skip if annotation file doesn't exist
            if not os.path.exists(ann_path):
                continue
            
            # Load image
            img_path = os.path.join(img_dir, img_file)
            image = cv2.imread(img_path)
            
            if image is None:
                print(f"Warning: Could not read image {img_path}")
                continue
            
            image_height, image_width = image.shape[:2]
            image_paths.append(img_path)
            # print(f'Image path: {img_path}')
            
            # Read annotations
            with open(ann_path, 'r') as f:
                lines = f.readlines()
            
            # Process each object in the image
            for line in lines:
                parts = line.strip().split()
                if len(parts) < 5:
                    continue
                    
                class_id = int(parts[0])
                # YOLO format: center_x, center_y, width, height (normalized)
                x_center = float(parts[1]) * image_width
                y_center = float(parts[2]) * image_height
                width = float(parts[3]) * image_width
                height = float(parts[4]) * image_height

                # Convert to top-left, bottom-right coordinates
                x1 = max(0, int(x_center - width / 2))
                y1 = max(0, int(y_center - height / 2))
                x2 = min(image_width, int(x_center + width / 2))
                y2 = min(image_height, int(y_center + height / 2))
                # x1 = float(image_width) * (2.0 * float(parts[1]) - float(parts[3])) / 2.0
                # y1 = float(image_height) * (2.0 * float(parts[2]) - float(parts[4])) / 2.0
                # x2 = float(image_width) * (2.0 * float(parts[1]) + float(parts[3])) / 2.0
                # y2 = float(image_height) * (2.0 * float(parts[2]) + float(parts[4])) / 2.0

                # Extract the object region (ROI)
                object_img = image[int(y1):int(y2), int(x1):int(x2)]
                
                # Extract features
                features = self.extract_features(object_img)
                X.append(features)
                y.append(class_id)
                bbox_data.append((img_path, class_id, int(x1), int(y1), int(x2), int(y2)))
                
                # Extract the object region with error handling
                try:
                    # Visual debugging if requested
                    if debug_visualization:
                        plt.figure(figsize=(10, 5))
                        plt.subplot(1, 2, 1)
                        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
                        plt.plot([x1, x2, x2, x1, x1], [y1, y1, y2, y2, y1], 'r-')
                        plt.title(f"Image with box: {self.class_names[class_id]}")
                        
                        plt.subplot(1, 2, 2)
                        plt.imshow(cv2.cvtColor(object_img, cv2.COLOR_BGR2RGB))
                        plt.title(f"Extracted region: {self.class_names[class_id]}")
                        
                        plt.tight_layout()
                        plt.show()
                except Exception as e:
                    print(f"Error processing region in {img_path}: {e}")
                    continue
                
                
        return np.array(X), np.array(y), bbox_data, image_paths
    
    def extract_features(self, image):
        """
        Extract features from an image.
        
        Args:
            image: Input image
            
        Returns:
            features: Extracted features
        """
        # Convert to grayscale
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            gray = image
            
        # Resize to a standard size
        resized = cv2.resize(gray, (100, 100))
        
        # Apply some basic features (without using pre-built model)
        # 1. Histogram of oriented gradients (simplified)
        gx = cv2.Sobel(resized, cv2.CV_32F, 1, 0)
        gy = cv2.Sobel(resized, cv2.CV_32F, 0, 1)
        mag, ang = cv2.cartToPolar(gx, gy)
        
        # 2. Intensity histogram
        hist = cv2.calcHist([resized], [0], None, [32], [0, 256])
        
        # 3. Local binary patterns (simplified)
        lbp = np.zeros_like(resized)
        for i in range(1, resized.shape[0] - 1):
            for j in range(1, resized.shape[1] - 1):
                center = resized[i, j]
                code = 0
                code |= (resized[i-1, j-1] >= center) << 0
                code |= (resized[i-1, j] >= center) << 1
                code |= (resized[i-1, j+1] >= center) << 2
                code |= (resized[i, j+1] >= center) << 3
                code |= (resized[i+1, j+1] >= center) << 4
                code |= (resized[i+1, j] >= center) << 5
                code |= (resized[i+1, j-1] >= center) << 6
                code |= (resized[i, j-1] >= center) << 7
                lbp[i, j] = code
        
        lbp_hist = cv2.calcHist([lbp.astype(np.uint8)], [0], None, [32], [0, 256])
        
        # Concatenate all features
        features = np.concatenate([
            mag.flatten(),
            ang.flatten(),
            hist.flatten(),
            lbp_hist.flatten()
        ])
        
        return features
    
    
    def save_model(self, model_path):
        """
        Save the trained model.
        
        Args:
            model_path: Path to save the model
        """
        model_data = {
            'pca': self.pca,
            'kmeans': self.kmeans,
            'classifier': self.classifier,
            'n_components': self.n_components,
            'n_clusters': self.n_clusters,
            'class_names': self.class_names,
            'colors': self.colors,
            'confidence_threshold': self.confidence_threshold
        }
        
        with open(model_path, 'wb') as f:
            pickle.dump(model_data, f)
        
        print(f"Model saved to {model_path}")
            
    def detect_objects(self, image, roi_method='felzenszwalb', expand_factor=0.1):
        """
        Detect objects in an image using region-based approach.
        
        Args:
            image: Input image
            roi_method: Method for finding regions of interest
            expand_factor: Factor to expand regions by
            
        Returns:
            detections: List of (class_id, confidence, x1, y1, x2, y2)
        """
        # Find regions of interest
        print(f"Finding regions of interest using {roi_method}...")
        regions = self.find_regions_of_interest(image, method=roi_method)
        print(f"Found {len(regions)} potential regions")
        
        detections = []
        image_height, image_width = image.shape[:2]
        
        # Process each region
        for i, region in enumerate(regions):
            x1, y1, x2, y2 = region
            
            # Expand region slightly
            width = x2 - x1
            height = y2 - y1
            
            # Add padding
            x1_expanded = max(0, int(x1 - width * expand_factor))
            y1_expanded = max(0, int(y1 - height * expand_factor))
            x2_expanded = min(image_width, int(x2 + width * expand_factor))
            y2_expanded = min(image_height, int(y2 + height * expand_factor))
            
            # Extract region
            try:
                region_img = image[y1_expanded:y2_expanded, x1_expanded:x2_expanded]
                
                # Skip if region is too small
                if region_img.shape[0] < 10 or region_img.shape[1] < 10:
                    continue
                
                # Extract features
                features = self.extract_features(region_img)
                features = features.reshape(1, -1)
                
                # Apply PCA
                features_pca = self.pca.transform(features)
                
                # Get cluster features
                cluster_features = self.kmeans.transform(features_pca)
                features_with_clusters = np.hstack([features_pca, cluster_features])
                
                # Predict class and confidence
                class_id = self.classifier.predict(features_with_clusters)[0]
                confidence = np.max(self.classifier.predict_proba(features_with_clusters)[0])
                
                if confidence > self.confidence_threshold:
                    detections.append((class_id, confidence, x1_expanded, y1_expanded, x2_expanded, y2_expanded))
            except Exception as e:
                print(f"Error processing region {i}: {e}")
                continue
        
        # Apply non-maximum suppression
        return self.non_max_suppression(detections)
    
    def non_max_suppression(self, boxes, overlap_thresh=0.5):
        """
        Apply non-maximum suppression to remove overlapping detections.
        
        Args:
            boxes: List of (class_id, confidence, x1, y1, x2, y2)
            overlap_thresh: Threshold for overlap
            
        Returns:
            result: List of non-overlapping boxes
        """
        # If no boxes, return empty list
        if len(boxes) == 0:
            return []
        
        # Convert to numpy array
        boxes = np.array(boxes)
        
        # Initialize the list of picked indexes
        pick = []
        
        # Extract coordinates
        class_ids = boxes[:, 0]
        confidences = boxes[:, 1]
        x1 = boxes[:, 2]
        y1 = boxes[:, 3]
        x2 = boxes[:, 4]
        y2 = boxes[:, 5]
        
        # Compute area of the boxes
        area = (x2 - x1 + 1) * (y2 - y1 + 1)
        
        # Sort by confidence (highest first)
        idxs = np.argsort(confidences)[::-1]
        
        # Keep looping while some indexes still remain in the indexes list
        while len(idxs) > 0:
            # Grab the last index and add the index value to the list of picked indexes
            last = len(idxs) - 1
            i = idxs[0]
            pick.append(i)
            
            # Find the largest (x, y) coordinates for the start of the bounding box
            # and the smallest (x, y) coordinates for the end of the bounding box
            xx1 = np.maximum(x1[i], x1[idxs[1:]])
            yy1 = np.maximum(y1[i], y1[idxs[1:]])
            xx2 = np.minimum(x2[i], x2[idxs[1:]])
            yy2 = np.minimum(y2[i], y2[idxs[1:]])
            
            # Compute the width and height of the bounding box
            w = np.maximum(0, xx2 - xx1 + 1)
            h = np.maximum(0, yy2 - yy1 + 1)
            
            # Compute the ratio of overlap
            overlap = (w * h) / area[idxs[1:]]
            
            # Delete all indexes from the index list that have overlap greater than the threshold
            idxs = np.delete(idxs, np.concatenate(([0], np.where(overlap > overlap_thresh)[0] + 1)))
        
        # Return only the bounding boxes that were picked
        result = [boxes[i] for i in pick]
        return result
    
    def visualize_detection(self, image, detections):
        """
        Visualize detection results.
        
        Args:
            image: Input image
            detections: List of (class_id, confidence, x1, y1, x2, y2)
            
        Returns:
            result_image: Image with detections
        """
        result_image = image.copy()
        
        # Sort detections by confidence (highest first)
        sorted_detections = sorted(detections, key=lambda x: x[1], reverse=True)
        
        for detection in sorted_detections:
            class_id, confidence, x1, y1, x2, y2 = detection
            class_id = int(class_id)
            
            # Draw bounding box
            color = self.colors[class_id]
            cv2.rectangle(result_image, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
            
            # Create label with class name and confidence
            label = f"{self.class_names[class_id]}: {confidence:.2f}"
            
            # Get text size for better label placement
            (label_width, label_height), baseline = cv2.getTextSize(
                label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2
            )
            
            # Draw label background
            cv2.rectangle(
                result_image, 
                (int(x1), int(y1) - label_height - 10), 
                (int(x1) + label_width, int(y1)), 
                color, 
                -1  # Filled rectangle
            )
            
            # Draw label text in white
            cv2.putText(
                result_image, 
                label, 
                (int(x1), int(y1) - 7),
                cv2.FONT_HERSHEY_SIMPLEX, 
                0.5, 
                (255, 255, 255), 
                2
            )
        
        # Add metadata
        timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        info_text = f"Detections: {len(detections)} | Time: {timestamp} | Threshold: {self.confidence_threshold}"
        cv2.putText(
            result_image,
            info_text,
            (10, result_image.shape[0] - 10),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.6,
            (0, 0, 0),
            2
        )
        
        return result_image
    
    
    def visualize_regions(self, image, regions):
        """
        Visualize regions of interest.
        
        Args:
            image: Input image
            regions: List of (x1, y1, x2, y2)
            
        Returns:
            result_image: Image with regions
        """
        result_image = image.copy()
        
        for i, region in enumerate(regions):
            x1, y1, x2, y2 = region
            
            # Draw bounding box in yellow
            cv2.rectangle(result_image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 255), 1)
            
            # Draw region number
            cv2.putText(
                result_image, 
                f"{i}", 
                (int(x1), int(y1) - 5),
                cv2.FONT_HERSHEY_SIMPLEX, 
                0.5, 
                (0, 255, 255), 
                1
            )
        
        # Add info
        cv2.putText(
            result_image,
            f"Regions: {len(regions)}",
            (10, result_image.shape[0] - 10),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.6,
            (0, 0, 0),
            2
        )
        
        return result_image
    
    
    def process_image(self, image_path, output_dir="output", visualize_roi=True):
        """
        Process an image and save detection results.
        
        Args:
            image_path: Path to the input image
            output_dir: Directory to save results
            visualize_roi: Whether to visualize regions of interest
        """
        # Create output directory if it doesn't exist
        os.makedirs(output_dir, exist_ok=True)
        
        # Get base filename without extension
        base_name = os.path.splitext(os.path.basename(image_path))[0]
        
        # Load image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Error: Could not read image at {image_path}")
            return
        
        # Find regions of interest
        regions = self.find_regions_of_interest(image)
        
        # Visualize regions if requested
        if visualize_roi:
            roi_image = self.visualize_regions(image, regions)
            roi_output_path = os.path.join(output_dir, f"{base_name}_regions.jpg")
            cv2.imwrite(roi_output_path, roi_image)
            print(f"Regions of interest saved to {roi_output_path}")
        
        # Detect objects
        detections = self.detect_objects(image)
        print(f"Found {len(detections)} objects")
        
        # Visualize detections
        result_image = self.visualize_detection(image, detections)
        detection_output_path = os.path.join(output_dir, f"{base_name}_detection.jpg")
        cv2.imwrite(detection_output_path, result_image)
        print(f"Detection result saved to {detection_output_path}")
        
    
    def find_regions_of_interest(self, image, method='felzenszwalb', min_size=100):
        """
        Find regions of interest in an image using segmentation.
        
        Args:
            image: Input image
            method: Segmentation method ('felzenszwalb', 'slic', or 'contour')
            min_size: Minimum size of regions to consider
            
        Returns:
            regions: List of (x1, y1, x2, y2) for potential object regions
        """
        # Convert to float and handle grayscale if needed
        img_float = img_as_float(image)
        
        if method == 'felzenszwalb':
            # Felzenszwalb segmentation
            segments = felzenszwalb(img_float, scale=100, sigma=0.5, min_size=min_size)
        elif method == 'slic':
            # SLIC superpixel segmentation
            segments = slic(img_float, n_segments=100, compactness=10, sigma=1, start_label=1)
        elif method == 'contour':
            # Edge-based contour detection
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            blurred = cv2.GaussianBlur(gray, (5, 5), 0)
            edges = cv2.Canny(blurred, 50, 150)
            
            # Clean up edges with morphological operations
            closed = closing(edges, square(3))
            
            # Find contours
            contours, _ = cv2.findContours(closed.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            
            # Create regions from contours
            regions = []
            for contour in contours:
                x, y, w, h = cv2.boundingRect(contour)
                if w * h >= min_size:
                    regions.append((x, y, x + w, y + h))
            return regions
        else:
            raise ValueError(f"Unknown segmentation method: {method}")
        
        # Extract regions from segments
        regions = []
        for segment_id in range(np.max(segments) + 1):
            # Create a mask for this segment
            mask = (segments == segment_id).astype(np.uint8)
            
            # Find contours in the mask
            contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            
            # If no contours, skip
            if not contours:
                continue
                
            # Get bounding box
            x, y, w, h = cv2.boundingRect(contours[0])
            
            # Filter by size
            if w * h >= min_size:
                regions.append((x, y, x + w, y + h))
        
        return regions
    

In [34]:
def load_model(model_path):
        """
        Load a trained model.
        
        Args:
            model_path: Path to the model
            
        Returns:
            detector: Loaded detector
        """
        with open(model_path, 'rb') as f:
            model_data = pickle.load(f)
        
        detector = CustomObjectDetector(
            n_components=model_data['n_components'],
            n_clusters=model_data['n_clusters'],
            confidence_threshold=model_data['confidence_threshold']
        )
        
        detector.pca = model_data['pca']
        detector.kmeans = model_data['kmeans']
        detector.classifier = model_data['classifier']
        detector.class_names = model_data['class_names']
        detector.colors = model_data['colors']
        
        print(f"Model loaded from {model_path}")
        return detector

In [35]:
# Define paths
image_folder = 'dataset/working/images'  # Base folder containing train, test, val
annotation_folder = 'dataset/working/labels'  # Base folder containing train, test, val
model_path = 'custom_detector1.pkl'

# Create and train the detector
detector = CustomObjectDetector()
detector.train(image_folder, annotation_folder)

# Option 2: Advanced training with background class (uncomment to use)
# detector.train_with_background(image_folder, annotation_folder)

# Save the model
detector.save_model(model_path)

Loading training data...
Loading dataset...


100%|██████████| 2787/2787 [42:23<00:00,  1.10it/s]    


Loaded 26960 training samples
Applying PCA...
Training K-means clustering...
Training the classifier...
Evaluating on test data...
Loading dataset...


100%|██████████| 514/514 [07:43<00:00,  1.11it/s]


Test accuracy: 0.8850634899856002

Classification Report:
                precision    recall  f1-score   support

         human       0.93      0.97      0.95      6511
wind/sup-board       0.52      0.43      0.47       634
          boat       0.02      0.01      0.01       143
          buoy       1.00      0.15      0.26        59
      sailboat       0.00      0.00      0.00        28
         kayak       0.54      0.55      0.54       264

      accuracy                           0.89      7639
     macro avg       0.50      0.35      0.37      7639
  weighted avg       0.87      0.89      0.87      7639

Model saved to custom_detector1.pkl


In [36]:
loaded_detector = load_model('custom_detector1.pkl')
image_path = "dataset/working/images/val/a_101.jpg"  # Update with correct image path
loaded_detector.process_image(image_path)

Model loaded from custom_detector1.pkl
Regions of interest saved to output/a_101_regions.jpg
Finding regions of interest using felzenszwalb...
Found 6340 potential regions
Found 566 objects
Detection result saved to output/a_101_detection.jpg
