# **FLOATING OBJECT DETECTION**

**About the dataset**


1. Dataset size?
2. Size of images?
3. How many categories?
4. Exist annotation file with no data
5. Six categories: human, wind/sup-board, boat, bouy, sailboat, kayak


**[Download dataset](https://www.kaggle.com/datasets/jangsienicajzkowy/afo-aerial-dataset-of-floating-objects/data)**

In [74]:
import shutil
from pathlib import Path

**Data path**

In [75]:
# Image path of PART 1,2,3
img_path_1 = 'dataset/PART_1/PART_1/images/'
img_path_2 = 'dataset/PART_2/PART_2/images/'
img_path_3 = 'dataset/PART_3/PART_3/images/'

# Categories path
# Categories: human, wind/sup-board, boat, bouy, sailboat, kayak
categories_path = 'dataset/PART_1/PART_1/6categories/'

**Split Data into Train, Test & Validation**

In [76]:
# Split into three parts: the training (67,4% of objects), the test (19,12% of objects),
# and the validation set (13,48% of objects). In order to prevent overfitting of the model to the given data,
# the test set contains selected frames from nine videos that were not used in either the training or validation sets.

# Split image to : dataset/working/images
# Split annotation to: dataset/working/labels

def split_data(file_list, img_path, ann_path, mode):
    #Check if we have our mode folders
    images_working_folder = Path( 'dataset/working/images/'+  mode)
    if not images_working_folder.exists():
        print(f"Path {images_working_folder} does not exit")
        os.makedirs(images_working_folder)

    labels_working_folder = Path('dataset/working/labels/' + mode)
    if not labels_working_folder.exists():
        print(f"Path {labels_working_folder} does not exit")
        os.makedirs(labels_working_folder)

    #Creates the name of our label file from the img name and creates our source file
    for file in file_list:
        name = file.replace('.jpg', '')
        img_src_file = str(img_path) + '/' + name + '.jpg'
        annot_src_file = str(ann_path) + '/' + name + '.txt'
        
        if Path(img_src_file).exists() and Path(annot_src_file).exists():
            #move image
            IMG_DIR = 'dataset/working/images/' + mode
            img_dest_file = str(IMG_DIR) + '/' + name + '.jpg'
            if os.path.isfile(img_src_file) and not Path(img_dest_file).exists():
                shutil.move(img_src_file, img_dest_file)
    
            # Copy annotations
            ANNOT_DIR = 'dataset/working/labels/' + mode
            annot_dest_file = str(ANNOT_DIR) + '/' + name + '.txt'
            if os.path.isfile(annot_src_file) and not Path(annot_dest_file).exists():
                shutil.move(annot_src_file, annot_dest_file)

In [77]:
#Get our images list
train_imgs = 'dataset/PART_1/PART_1/train.txt'
test_imgs = 'dataset/PART_1/PART_1/test.txt'
val_imgs = 'dataset/PART_1/PART_1/validation.txt'
with open(train_imgs, 'r') as f:
    train_img_list = [line.strip() for line in f.readlines()]

with open(test_imgs, 'r') as f:
    test_img_list = [line.strip() for line in f.readlines()]

with open(val_imgs, 'r') as f:
    val_img_list = [line.strip() for line in f.readlines()]

print(train_img_list[0], test_img_list[0], val_img_list[0])

a_102.jpg k2_38.jpg a_101.jpg


In [78]:
# Root path
root_img_path = Path('dataset/images/')
root_ann_path = Path('dataset/annotations/')

#Split Data
split_data(train_img_list, root_img_path, root_ann_path, 'train')
split_data(test_img_list, root_img_path, root_ann_path, 'test')
split_data(val_img_list, root_img_path, root_ann_path, 'val')

In [79]:
import glob
import os
working_image_path = 'dataset/working/images/'
working_labels_path = 'dataset/working/labels/'

# Images
img_test_path = glob.glob(os.path.join(working_image_path + '/test/' , "*.jpg"))
print(f'img_test_path: {len(img_test_path)}')

img_train_path = glob.glob(os.path.join(working_image_path + '/train/' , "*.jpg"))
print(f'img_train_path: {len(img_train_path)}')

img_val_path = glob.glob(os.path.join(working_image_path + '/val/' , "*.jpg"))
print(f'img_val_path: {len(img_val_path)}')

# Labels
label_test_path = glob.glob(os.path.join(working_labels_path + '/test/' , "*.txt"))
print(f'label_test_path: {len(label_test_path)}')

label_train_path = glob.glob(os.path.join(working_labels_path + '/train/' , "*.txt"))
print(f'label_train_path: {len(label_train_path)}')

label_val_path = glob.glob(os.path.join(working_image_path + '/val/' , "*.txt"))
print(f'label_val_path: {len(label_val_path)}')

img_test_path: 514
img_train_path: 2787
img_val_path: 339
label_test_path: 514
label_train_path: 2787
label_val_path: 0


### **Train model**

In [80]:
import os
import cv2
import numpy as np
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import pickle
import matplotlib.pyplot as plt
from tqdm import tqdm
import datetime

class CustomObjectDetector:
    def __init__(self, n_components=50, n_clusters=20, confidence_threshold=0.8, debug_mode=False):
    
        self.n_components = n_components
        self.n_clusters = n_clusters
        self.confidence_threshold = confidence_threshold
        self.debug_mode = debug_mode
        
        # Initialize the models
        self.pca = PCA(n_components=n_components)
        self.kmeans = KMeans(n_clusters=n_clusters)
        self.classifier = RandomForestClassifier(n_estimators=100, random_state=42)
        
        # Class names for visualization
        self.class_names = ['human', 'wind/sup-board', 'boat', 'buoy', 'sailboat', 'kayak']
        
        # Colors for visualization (one for each class)
        self.colors = [
            (0, 255, 0),    # Green for human
            (255, 0, 0),    # Blue for wind/sup-board
            (0, 0, 255),    # Red for boat
            (255, 255, 0),  # Cyan for buoy
            (255, 0, 255),  # Magenta for sailboat
            (0, 255, 255)   # Yellow for kayak
        ]
    
    def train(self, image_folder, annotation_folder):
        print("Loading training data...")
        X_train, y_train, _, _ = self.load_dataset(image_folder, annotation_folder, 'train', debug_visualization=False)
        
        if len(X_train) == 0:
            raise ValueError("No training data found. Check your paths and data format.")

        print(f"Loaded {len(X_train)} training samples")

        # Apply PCA for dimensionality reduction
        print("Applying PCA...")
        X_train_pca = self.pca.fit_transform(X_train)

        # Train k-means for feature clustering
        print("Training K-means clustering...")
        self.kmeans.fit(X_train_pca)

        # Add cluster information to features
        cluster_features = self.kmeans.transform(X_train_pca)
        X_train_with_clusters = np.hstack([X_train_pca, cluster_features])

        # Train the classifier
        print("Training the classifier...")
        self.classifier.fit(X_train_with_clusters, y_train)

        # Evaluate on validation set if available
        try:
            print("Evaluating on test data...")
            # X_val, y_val, _, _ = self.load_dataset(image_folder, annotation_folder, 'val')
            X_test, y_test, _, _ = self.load_dataset(image_folder, annotation_folder, 'test')
            if len(X_test) > 0:
                X_test_pca = self.pca.transform(X_test)
                cluster_test = self.kmeans.transform(X_test_pca)
                X_test_with_clusters = np.hstack([X_test_pca, cluster_test])
                y_pred = self.classifier.predict(X_test_with_clusters)
                print("Test accuracy:", accuracy_score(y_test, y_pred))
                print("\nClassification Report:")
                print(classification_report(y_test, y_pred, target_names=self.class_names))
        except Exception as e:
            print(f"Could not evaluate on validation data: {e}")
        
    
    def load_dataset(self, image_folder, annotation_folder, split_type='train', debug_visualization=False):
        print("Loading dataset...")
        X = []
        y = []
        bbox_data = []
        image_paths = []
        
        img_dir = os.path.join(image_folder, split_type)
        ann_dir = os.path.join(annotation_folder, split_type)
        
        image_files = [f for f in os.listdir(img_dir) if f.endswith('.jpg')]
        
        for img_file in tqdm(image_files):
            # Get the corresponding annotation file
            ann_file = os.path.splitext(img_file)[0] + '.txt'
            ann_path = os.path.join(ann_dir, ann_file)
            
            # Skip if annotation file doesn't exist
            if not os.path.exists(ann_path):
                continue
            
            # Load image
            img_path = os.path.join(img_dir, img_file)
            image = cv2.imread(img_path)
            
            if image is None:
                print(f"Warning: Could not read image {img_path}")
                continue
            
            image_height, image_width = image.shape[:2]
            image_paths.append(img_path)
            # print(f'Image path: {img_path}')
            
            # Read annotations
            with open(ann_path, 'r') as f:
                lines = f.readlines()
            
            # Process each object in the image
            for line in lines:
                parts = line.strip().split()
                if len(parts) < 5:
                    continue
                    
                class_id = int(parts[0])
                # YOLO format: center_x, center_y, width, height (normalized)
                x_center = float(parts[1]) * image_width
                y_center = float(parts[2]) * image_height
                width = float(parts[3]) * image_width
                height = float(parts[4]) * image_height

                # Convert to top-left, bottom-right coordinates
                x1 = max(0, int(x_center - width / 2))
                y1 = max(0, int(y_center - height / 2))
                x2 = min(image_width, int(x_center + width / 2))
                y2 = min(image_height, int(y_center + height / 2))

                # Extract the object region (ROI)
                object_img = image[int(y1):int(y2), int(x1):int(x2)]
                
                # Extract features
                features = self.extract_features(object_img)
                X.append(features)
                y.append(class_id)
                bbox_data.append((img_path, class_id, int(x1), int(y1), int(x2), int(y2)))
                
                # Extract the object region with error handling
                try:
                    # Visual debugging if requested
                    if debug_visualization:
                        plt.figure(figsize=(10, 5))
                        plt.subplot(1, 2, 1)
                        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
                        plt.plot([x1, x2, x2, x1, x1], [y1, y1, y2, y2, y1], 'r-')
                        plt.title(f"Image with box: {self.class_names[class_id]}")
                        
                        plt.subplot(1, 2, 2)
                        plt.imshow(cv2.cvtColor(object_img, cv2.COLOR_BGR2RGB))
                        plt.title(f"Extracted region: {self.class_names[class_id]}")
                        
                        plt.tight_layout()
                        plt.show()
                except Exception as e:
                    print(f"Error processing region in {img_path}: {e}")
                    continue
                
                
        return np.array(X), np.array(y), bbox_data, image_paths
    
    def extract_features(self, image):
        # Convert to grayscale
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            gray = image
            
        # Resize to a standard size
        resized = cv2.resize(gray, (100, 100))
        
        # Apply some basic features (without using pre-built model)
        # 1. Histogram of oriented gradients (simplified)
        gx = cv2.Sobel(resized, cv2.CV_32F, 1, 0)
        gy = cv2.Sobel(resized, cv2.CV_32F, 0, 1)
        mag, ang = cv2.cartToPolar(gx, gy)
        
        # 2. Intensity histogram
        hist = cv2.calcHist([resized], [0], None, [32], [0, 256])
        
        # 3. Local binary patterns (simplified)
        lbp = np.zeros_like(resized)
        for i in range(1, resized.shape[0] - 1):
            for j in range(1, resized.shape[1] - 1):
                center = resized[i, j]
                code = 0
                code |= (resized[i-1, j-1] >= center) << 0
                code |= (resized[i-1, j] >= center) << 1
                code |= (resized[i-1, j+1] >= center) << 2
                code |= (resized[i, j+1] >= center) << 3
                code |= (resized[i+1, j+1] >= center) << 4
                code |= (resized[i+1, j] >= center) << 5
                code |= (resized[i+1, j-1] >= center) << 6
                code |= (resized[i, j-1] >= center) << 7
                lbp[i, j] = code
        
        lbp_hist = cv2.calcHist([lbp.astype(np.uint8)], [0], None, [32], [0, 256])
        
        # Concatenate all features
        features = np.concatenate([
            mag.flatten(),
            ang.flatten(),
            hist.flatten(),
            lbp_hist.flatten()
        ])
        
        return features
    
    def save_model(self, model_path):
        model_data = {
            'pca': self.pca,
            'kmeans': self.kmeans,
            'classifier': self.classifier,
            'n_components': self.n_components,
            'n_clusters': self.n_clusters,
            'class_names': self.class_names,
            'colors': self.colors,
            'confidence_threshold': self.confidence_threshold
        }
        
        with open(model_path, 'wb') as f:
            pickle.dump(model_data, f)
        
        print(f"Model saved to {model_path}")
        
    def find_regions_of_interest(self, image):
        
         # Convert to HSV for color-based detection
        hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
        
        # Step 1: Create color masks
        # White/light colored
        lower_white = np.array([0, 0, 180])
        upper_white = np.array([180, 30, 255])
        white_mask = cv2.inRange(hsv, lower_white, upper_white)
        
        lower_blue = np.array([90, 50, 50])
        upper_blue = np.array([130, 255, 255])
        blue_mask = cv2.inRange(hsv, lower_blue, upper_blue)
        
        # Combined mask
        boat_mask = cv2.bitwise_or(white_mask, blue_mask)
        
        # Clean up mask
        kernel = np.ones((5, 5), np.uint8)
        boat_mask = cv2.morphologyEx(boat_mask, cv2.MORPH_OPEN, kernel, iterations=1)
        boat_mask = cv2.morphologyEx(boat_mask, cv2.MORPH_CLOSE, kernel, iterations=2)
        
        # Step 2: Find contours in the mask
        contours, _ = cv2.findContours(boat_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        # Step 3: Filter contours based on size, shape, and area
        detected_objects = []
        
        for contour in contours:
            # Get bounding rectangle
            x, y, w, h = cv2.boundingRect(contour)
            
            area = cv2.contourArea(contour)
                
            # Check if shape is appropriate for a boat
            if area > 200:
                # Add padding
                padding = 10
                x = max(0, x - padding)
                y = max(0, y - padding)
                w = min(image.shape[1] - x, w + 2*padding)
                h = min(image.shape[0] - y, h + 2*padding)
                
                confidence = min(area / 1000, 0.99)
                detected_objects.append((x, y, w, h, confidence))
                
        
        # Step 4: If insufficient detections from color, try structural analysis
        if len(detected_objects) < 1:
            # Convert to grayscale
            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
            
            # Enhance contrast
            clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
            enhanced = clahe.apply(gray)
            
            # Use bilateral filter to preserve edges but smooth areas
            filtered = cv2.bilateralFilter(enhanced, 9, 75, 75)
            
            # Use Canny edge detector to find edges
            edges = cv2.Canny(filtered, 30, 150)
            
            # Dilate to connect edges
            dilated = cv2.dilate(edges, kernel, iterations=1)
            
            # Find contours in the edges
            contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            
            # Filter contours
            for contour in contours:
                x, y, w, h = cv2.boundingRect(contour) 
                aspect_ratio = float(w) / h
                area = cv2.contourArea(contour)
                
                # Boats typically have an aspect ratio between 1:1 and 3:1
                if 1.0 <= aspect_ratio <= 3.0 and area > 300:
                    # Add padding
                    padding = 10
                    x = max(0, x - padding)
                    y = max(0, y - padding)
                    w = min(image.shape[1] - x, w + 2*padding)
                    h = min(image.shape[0] - y, h + 2*padding)
                    
                    confidence = min(area / 2000, 0.7)  # Lower confidence for this method
                    detected_objects.append((x, y, w, h, confidence))
        
        # Debug visualization if needed
        # if self.debug_mode:
        #     debug_img = image.copy()
        #     for x, y, w, h, conf in detected_objects:
        #         cv2.rectangle(debug_img, (x, y), (x+w, y+h), (255, 0, 0), 2)
        #         cv2.putText(debug_img, f"Object {conf:.2f}", (x, y-5), 
        #                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
        #     
        #     plt.figure(figsize=(10, 8))
        #     plt.subplot(121)
        #     plt.imshow(boat_mask, cmap='gray')
        #     plt.title("Object Detection Mask")
        #     plt.subplot(122)
        #     plt.imshow(debug_img)
        #     plt.title("Object Detections")
        #     plt.tight_layout()
        #     plt.show()
        
        return detected_objects
            
    def detect_objects(self, image_rgb, image):
        # Find regions of interest
        regions = self.find_regions_of_interest(image_rgb)
        detections = []
        
        # Process each region
        for i, region in enumerate(regions):
            x, y, w, h, conf = region
            
            # Extract region
            try:
                region_img = image[y:y+h, x:x+w]
                # Skip if region is too small
                if region_img.shape[0] < 10 or region_img.shape[1] < 10:
                    continue
                
                # Extract features
                features = self.extract_features(region_img)
                features = features.reshape(1, -1)
                
                # Apply PCA
                features_pca = self.pca.transform(features)
                
                # Get cluster features
                cluster_features = self.kmeans.transform(features_pca)
                features_with_clusters = np.hstack([features_pca, cluster_features])
                
                # Predict class and confidence
                class_id = self.classifier.predict(features_with_clusters)[0]
                confidence = np.max(self.classifier.predict_proba(features_with_clusters)[0])
                
                if confidence > self.confidence_threshold:
                    detections.append((class_id, confidence, x, y, w, h))
            except Exception as e:
                print(f"Error processing region {i}: {e}")
                continue
        
        # Step 5: Perform non-maximum suppression
        object_detections = self._non_max_suppression(detections)
        # Filter by confidence threshold
        object_detections = [obj for obj in object_detections if obj[2] >= self.confidence_threshold]
        
        return object_detections
    
    
    def process_image(self, image_path, output_dir="output"):
        # Create output directory if it doesn't exist
        os.makedirs(output_dir, exist_ok=True)
        
        # Get base filename without extension
        base_name = os.path.splitext(os.path.basename(image_path))[0]
        
        # Load image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Error: Could not read image at {image_path}")
            return
        
        # Convert to RGB
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Find regions of interest
        # object_detection = self.find_regions_of_interest(image_rgb)
        
        # Detect objects
        detections = self.detect_objects(image_rgb, image)
        print(f"Found {len(detections)} objects")
        
        # Visualize detections
        result_image = self.visualize_detection(image, detections)
        detection_output_path = os.path.join(output_dir, f"{base_name}_detection.jpg")
        cv2.imwrite(detection_output_path, result_image)
        print(f"Detection result saved to {detection_output_path}")
        
        # Draw detections on the visualization image
        # self._draw_detections(visualization, detections)
        # 
        # # Save or display results
        # if output_dir:
        #     cv2.imwrite(output_dir, cv2.cvtColor(visualization, cv2.COLOR_RGB2BGR))
        #     print(f"Results saved to {output_dir}")
        #     
        # elif self.debug_mode:
        #     plt.figure(figsize=(15, 10))
        #     plt.imshow(visualization)
        #     plt.title("Object Detections")
        #     plt.axis('off')
        #     plt.tight_layout()
        #     plt.show()
        
        return detections
    
    def _calculate_iou(self, box1, box2):
        """
        Calculate Intersection over Union for two boxes.
        
        Args:
            box1: Tuple of (x1, y1, x2, y2)
            box2: Tuple of (x1, y1, x2, y2)
            
        Returns:
            float: IoU value
        """
        # Calculate intersection area
        x_left = max(box1[0], box2[0])
        y_top = max(box1[1], box2[1])
        x_right = min(box1[2], box2[2])
        y_bottom = min(box1[3], box2[3])
        
        if x_right < x_left or y_bottom < y_top:
            return 0.0
        
        intersection_area = (x_right - x_left) * (y_bottom - y_top)
        
        # Calculate area of both boxes
        box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
        box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
        
        # Calculate IoU
        iou = intersection_area / float(box1_area + box2_area - intersection_area)
        return iou
    
    def _non_max_suppression(self, boxes, overlap_threshold=0.3):
        if not boxes:
            return []
        
        # Convert (x, y, w, h) to (x1, y1, x2, y2) format
        boxes_xyxy = [(class_id, conf ,x, y, x+w, y+h) for class_id, conf, x, y, w, h in boxes]
        
        # Sort by confidence (highest first)
        boxes_xyxy.sort(key=lambda x: x[1], reverse=True)
        
        keep = []
        
        while boxes_xyxy:
            # Take the box with highest confidence
            current = boxes_xyxy.pop(0)
            keep.append(current)
            
            # Check remaining boxes
            i = 0
            while i < len(boxes_xyxy):
                # Calculate IoU between current box and this box
                iou = self._calculate_iou(current[2:6], boxes_xyxy[i][2:6])

                
                if iou > overlap_threshold:
                    # Remove box
                    boxes_xyxy.pop(i)
                else:
                    i += 1
        
        # Convert back to (x, y, w, h, confidence) format
        result = [(class_id, conf, x1, y1, x2-x1, y2-y1) for class_id, conf, x1, y1, x2, y2, in keep]
        return result
    
    def visualize_detection(self, image, detections):
        result_image = image.copy()
        
        # Sort detections by confidence (highest first)
        sorted_detections = sorted(detections, key=lambda x: x[1], reverse=True)
        
        for detection in sorted_detections:
            class_id, confidence, x, y, w, h = detection
            x1, y1, x2, y2 = int(x), int(y), int(x + w), int(y + h)
            class_id = int(class_id)
            
            # Draw bounding box
            color = self.colors[class_id]
            cv2.rectangle(result_image, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
            
            # Create label with class name and confidence
            label = f"{self.class_names[class_id]}: {confidence:.2f}"
            
            # Get text size for better label placement
            (label_width, label_height), baseline = cv2.getTextSize(
                label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2
            )
            
            # Draw label background
            cv2.rectangle(
                result_image, 
                (int(x1), int(y1) - label_height - 10), 
                (int(x1) + label_width, int(y1)), 
                color, 
                -1  # Filled rectangle
            )
            
            # Draw label text in white
            cv2.putText(
                result_image, 
                label, 
                (int(x1), int(y1) - 7),
                cv2.FONT_HERSHEY_SIMPLEX, 
                0.5, 
                (255, 255, 255), 
                2
            )
        
        # Add metadata
        timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        info_text = f"Detections: {len(detections)} | Time: {timestamp} | Threshold: {self.confidence_threshold}"
        cv2.putText(
            result_image,
            info_text,
            (10, result_image.shape[0] - 10),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.6,
            (0, 0, 0),
            2
        )
        
        return result_image
    

In [81]:
def load_model(model_path):
        """
        Load a trained model.
        
        Args:
            model_path: Path to the model
            
        Returns:
            detector: Loaded detector
        """
        with open(model_path, 'rb') as f:
            model_data = pickle.load(f)
        
        detector = CustomObjectDetector(
            n_components=model_data['n_components'],
            n_clusters=model_data['n_clusters'],
            confidence_threshold=model_data['confidence_threshold']
        )
        
        detector.pca = model_data['pca']
        detector.kmeans = model_data['kmeans']
        detector.classifier = model_data['classifier']
        detector.class_names = model_data['class_names']
        detector.colors = model_data['colors']
        
        print(f"Model loaded from {model_path}")
        return detector

In [82]:
# Define paths
image_folder = 'dataset/working/images'  # Base folder containing train, test, val
annotation_folder = 'dataset/working/labels'  # Base folder containing train, test, val
model_path = 'custom_detector1.pkl'

# Create and train the detector
detector = CustomObjectDetector()
detector.train(image_folder, annotation_folder)

# Option 2: Advanced training with background class (uncomment to use)
# detector.train_with_background(image_folder, annotation_folder)

# Save the model
detector.save_model(model_path)

Loading training data...
Loading dataset...


100%|██████████| 2787/2787 [25:40<00:00,  1.81it/s] 


Loaded 26960 training samples
Applying PCA...
Training K-means clustering...
Training the classifier...
Evaluating on test data...
Loading dataset...


100%|██████████| 514/514 [07:28<00:00,  1.15it/s]


Test accuracy: 0.8858489331064275

Classification Report:
                precision    recall  f1-score   support

         human       0.94      0.97      0.95      6511
wind/sup-board       0.53      0.43      0.48       634
          boat       0.02      0.01      0.01       143
          buoy       0.90      0.15      0.26        59
      sailboat       0.00      0.00      0.00        28
         kayak       0.49      0.53      0.51       264

      accuracy                           0.89      7639
     macro avg       0.48      0.35      0.37      7639
  weighted avg       0.87      0.89      0.87      7639

Model saved to custom_detector1.pkl


In [83]:
loaded_detector = load_model('custom_detector1.pkl')
image_path = "dataset/working/images/val/a_1013.jpg"
loaded_detector.process_image(image_path)

Model loaded from custom_detector1.pkl
Found 36 objects
Detection result saved to output/a_1013_detection.jpg


[(np.int64(0), np.float64(1.0), 562, 1564, 39, 44),
 (np.int64(0), np.float64(1.0), 2870, 1452, 50, 52),
 (np.int64(0), np.float64(1.0), 820, 1324, 46, 56),
 (np.int64(0), np.float64(1.0), 732, 1312, 70, 51),
 (np.int64(0), np.float64(1.0), 747, 1277, 60, 43),
 (np.int64(0), np.float64(1.0), 1215, 477, 59, 39),
 (np.int64(0), np.float64(1.0), 2743, 409, 50, 39),
 (np.int64(0), np.float64(1.0), 2129, 374, 37, 67),
 (np.int64(0), np.float64(0.99), 2189, 1534, 56, 47),
 (np.int64(0), np.float64(0.99), 646, 1321, 55, 55),
 (np.int64(0), np.float64(0.99), 2759, 826, 72, 77),
 (np.int64(0), np.float64(0.99), 1630, 548, 45, 36),
 (np.int64(0), np.float64(0.99), 2830, 370, 54, 59),
 (np.int64(0), np.float64(0.98), 1058, 1225, 55, 58),
 (np.int64(1), np.float64(0.98), 1014, 698, 267, 182),
 (np.int64(0), np.float64(0.98), 2750, 431, 58, 46),
 (np.int64(0), np.float64(0.97), 726, 1242, 48, 38),
 (np.int64(0), np.float64(0.97), 1536, 225, 58, 49),
 (np.int64(0), np.float64(0.96), 205, 1778, 33, 4