In [None]:
pip install opencv-python numpy scikit-learn joblib scipy

In [None]:
import cv2
import numpy as np
import glob
import time
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score
from joblib import Parallel, delayed
import scipy.ndimage as ndimage

TRAIN_IMG_DIR = "./ADEChallengeData2016/images/training/"
TRAIN_ANN_DIR = "./ADEChallengeData2016/annotations/training/"
VAL_IMG_DIR   = "./ADEChallengeData2016/images/validation/"
VAL_ANN_DIR   = "./ADEChallengeData2016/annotations/validation/"

FLOOR_ID = 4

DOWNSCALE_MAX = 300
CLUSTERS_PER_IMAGE = 200

NUM_TRAIN_IMAGES = 50
NUM_VAL_IMAGES = 20

SVM_KERNEL = "linear"
SVM_C = 1.0
N_JOBS = -1

def downscale_keep_aspect(img, max_dim):
    h, w = img.shape[:2]
    scale = 1.0
    if max(h, w) > max_dim:
        scale = max_dim / max(h, w)
        img = cv2.resize(img, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_AREA)
    return img, scale

def process_single_image(img_path, ann_path, n_clusters, max_dim):
    try:
        img = cv2.imread(img_path)
        ann = cv2.imread(ann_path, 0)
        if img is None or ann is None: return None


        img_ds, scale = downscale_keep_aspect(img, max_dim)
        ann_ds = cv2.resize(ann, (img_ds.shape[1], img_ds.shape[0]), interpolation=cv2.INTER_NEAREST)
        
        H, W = img_ds.shape[:2]
        

        rgb = img_ds.reshape(-1, 3).astype(np.float32)
        xs, ys = np.meshgrid(np.arange(W), np.arange(H))
        xs_flat = xs.reshape(-1)
        ys_flat = ys.reshape(-1)
        

        xs_norm = xs_flat / W
        ys_norm = ys_flat / H
        
        cluster_input = np.hstack([rgb, xs_norm.reshape(-1, 1), ys_norm.reshape(-1, 1)])


        km = KMeans(n_clusters=n_clusters, random_state=42, n_init=4)
        labels_flat = km.fit_predict(cluster_input)
        labels_grid = labels_flat.reshape(H, W)


        present_clusters = np.unique(labels_flat)
        

        floor_mask_flat = (ann_ds.reshape(-1) == FLOOR_ID).astype(np.float32)
        

        mean_r = ndimage.mean(rgb[:, 0], labels=labels_flat, index=present_clusters)
        mean_g = ndimage.mean(rgb[:, 1], labels=labels_flat, index=present_clusters)
        mean_b = ndimage.mean(rgb[:, 2], labels=labels_flat, index=present_clusters)
        

        std_r = ndimage.standard_deviation(rgb[:, 0], labels=labels_flat, index=present_clusters)
        std_g = ndimage.standard_deviation(rgb[:, 1], labels=labels_flat, index=present_clusters)
        std_b = ndimage.standard_deviation(rgb[:, 2], labels=labels_flat, index=present_clusters)
        

        mean_x = ndimage.mean(xs_norm, labels=labels_flat, index=present_clusters)
        mean_y = ndimage.mean(ys_norm, labels=labels_flat, index=present_clusters)
        

        counts = ndimage.histogram(labels_flat, min=0, max=n_clusters-1, bins=n_clusters)
        area_frac = counts[present_clusters] / float(H * W)
        
        floor_fractions = ndimage.mean(floor_mask_flat, labels=labels_flat, index=present_clusters)
        region_labels = (floor_fractions >= 0.5).astype(np.int32)
        
        std_sum = std_r + std_g + std_b
        features = np.column_stack([mean_r, mean_g, mean_b, std_sum, mean_x, mean_y, area_frac])
        
        return features, region_labels, (img_ds, ann_ds, labels_grid)

    except Exception as e:
        print(f"Error: {e}")
        return None

def build_dataset_parallel(image_paths, ann_paths, n_clusters, max_dim):
    print(f"Processing {len(image_paths)} images with {N_JOBS} jobs...")
    results = Parallel(n_jobs=N_JOBS)(delayed(process_single_image)(img, ann, n_clusters, max_dim) 
                                      for img, ann in zip(image_paths, ann_paths))
    
    results = [r for r in results if r is not None]
    
    if not results:
        return np.array([]), np.array([]), []
        
    X_list = [r[0] for r in results]
    y_list = [r[1] for r in results]
    maps   = [r[2] for r in results]
    
    return np.vstack(X_list), np.concatenate(y_list), maps


train_images = sorted(glob.glob(TRAIN_IMG_DIR + "*.jpg"))[:NUM_TRAIN_IMAGES]
train_annots = sorted(glob.glob(TRAIN_ANN_DIR + "*.png"))[:NUM_TRAIN_IMAGES]

print("Building region dataset for training...")
t0 = time.time()
X_train, y_train, _ = build_dataset_parallel(train_images, train_annots, CLUSTERS_PER_IMAGE, DOWNSCALE_MAX)
build_train_time = time.time() - t0
print(f"Built train regions: {X_train.shape[0]} regions from {len(train_images)} images (time {build_train_time:.1f}s)")

print("Training SVM...")
svm = SVC(kernel=SVM_KERNEL, C=SVM_C)
t0 = time.time()
svm.fit(X_train, y_train)
train_time = time.time() - t0
print(f"Trained SVM in {train_time:.2f}s")


val_images = sorted(glob.glob(VAL_IMG_DIR + "*.jpg"))[:NUM_VAL_IMAGES]
val_annots = sorted(glob.glob(VAL_ANN_DIR + "*.png"))[:NUM_VAL_IMAGES]

print("Building region dataset for validation...")
t0 = time.time()
X_val, y_val, val_region_maps = build_dataset_parallel(val_images, val_annots, CLUSTERS_PER_IMAGE, DOWNSCALE_MAX)
build_val_time = time.time() - t0

t0 = time.time()
y_pred = svm.predict(X_val)
inference_time = time.time() - t0

accuracy  = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred, zero_division=0)
recall    = recall_score(y_val, y_pred, zero_division=0)

print("\n=== Optimized Method 3: Region-based (KMeans -> SVM) ===")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"Region build (train) time: {build_train_time:.2f}s")
print(f"SVM training time:         {train_time:.2f}s")
print(f"Region build (val) time:   {build_val_time:.2f}s")
print(f"Inference time:            {inference_time:.2f}s")


if len(val_region_maps) > 0:
    img_ds, ann_ds, labels_grid_val = val_region_maps[0]
    H, W = img_ds.shape[:2]
    

    print("Visualization skipped in batch mode for simplicity.")


Building region dataset for training...
Processing 50 images with -1 jobs...
Built train regions: 10000 regions from 50 images (time 61.3s)
Training SVM...
Trained SVM in 127.60s
Building region dataset for validation...
Processing 20 images with -1 jobs...

=== Optimized Method 3: Region-based (KMeans -> SVM) ===
Accuracy : 0.9490
Precision: 0.0000
Recall   : 0.0000
Region build (train) time: 61.27s
SVM training time:         127.60s
Region build (val) time:   25.52s
Inference time:            0.05s
Visualization skipped in batch mode for simplicity.
