# Segmentation

Name: Syed Zain Raza

## Loading Images

### Building-Only Images: `entry-P10`

In [2]:
import glob
from typing import List

In [3]:
BASE_DATA_PATH = "./SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008"
subset_names1 = ["entry-P10"]
file_ext_pattern = "*.jpg"

In [4]:
def aggregate_image_paths(
    base_data_path: str,
    image_subset_paths: List[str],
    file_ext_pattern: str,
) -> List[str]:
    """
    Look up all the images we care about using a file paths pattern. 
    Returns the file paths in a 1D Python list.
    """
    all_img_paths = list()

    for subset in image_subset_paths:
        pattern_for_subset_images = "/".join(
            [base_data_path, subset, "images", file_ext_pattern]
        )
        all_img_paths.extend(glob.glob(pattern_for_subset_images))

    return all_img_paths

In [7]:
ten_building_only_img_paths = aggregate_image_paths(BASE_DATA_PATH, subset_names1, file_ext_pattern)

In [8]:
ten_building_only_img_paths

['./SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0006.jpg',
 './SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0007.jpg',
 './SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0005.jpg',
 './SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0004.jpg',
 './SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0000.jpg',
 './SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0001.jpg',
 './SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0003.jpg',
 './SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0002.jpg',
 './SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0009.jpg',
 './SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10/images/0008.jpg']

In [9]:
from util import ops

In [16]:
building_only_img_arrays = [
    ops.load_image(
        img_path,
        return_grayscale=False,
        return_array=True,  # dictates that we want to have a NumPy array
        verbosity=False,
    )
    for img_path in ten_building_only_img_paths
]
# for convenience
building_only_img_map = dict(zip(ten_building_only_img_paths, building_only_img_arrays))

### Building and Tractor Images

- let `castle-P30` = "training" images for the framework
- let `castle-P19` = "hold-out" images in the framework

In [13]:
train_img_paths = aggregate_image_paths(
    BASE_DATA_PATH, ["castle-P30"], file_ext_pattern
)

test_img_paths = aggregate_image_paths(
    BASE_DATA_PATH, ["castle-P19"], file_ext_pattern
)

In [17]:
train_img_arrays = [
    ops.load_image(
        img_path,
        return_grayscale=False,
        return_array=True,  # dictates that we want to have a NumPy array
        verbosity=False,
    )
    for img_path in train_img_paths
]
# for convenience
train_img_map = dict(zip(train_img_paths, train_img_arrays))

In [18]:
test_img_arrays = [
    ops.load_image(
        img_path,
        return_grayscale=False,
        return_array=True,  # dictates that we want to have a NumPy array
        verbosity=False,
    )
    for img_path in test_img_paths
]
# for convenience
test_img_map = dict(zip(test_img_paths, test_img_arrays))

## Implementation

### Imports

In [12]:
import cv2
import numpy as np
from util.clustering import KMeans
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

### Training

Goal is for us to have a trained estimator that, when given a set of pixel-wise features of an image, can produce a binary mask we can use for segmenting out our tractor.

#### Step 1: Feature Engineering

In [24]:
# Step 1: get features. For now, stick to simple RGB space
def _reshape_into_3d(img: np.ndarray) -> np.ndarray:
        
        pixels = np.zeros((img.shape[1] * img.shape[0], 3))
        pixel_index = 0
        for x in np.arange(img.shape[1]):
            for y in np.arange(img.shape[0]):
                pixels[pixel_index] = img[y, x, :]
                pixel_index += 1
        
        return pixels

In [25]:
len(train_img_arrays)

30

In [26]:
train_img_building_and_tractor_features = list(map(_reshape_into_3d, train_img_arrays))

In [27]:
train_img_building_only_features = list(map(_reshape_into_3d, building_only_img_arrays))

#### Step 2: Clustering the Building Only Images

We use mean shift clustering, as we want an unbiased picture on how closely clustered the features of the building tend to be.

In [35]:
from typing import Dict, Tuple

import numpy as np


def mean_shift(
    data: np.ndarray,
    window_size: int = 1.0,
    max_iter: int = 100,
    convergence_threshold: float = 1e-4,
) -> Dict[int, Tuple[Tuple[float], List[np.ndarray]]]:
    """
    Unsupervised clustering of n-dimensional feature vectors using Mean-Shift.
    Returns a dictionary in the form: {cluster_label -> (cluster_centroid_coords, list_of_member_img_indices)}.
    """
    # 1: find the "hill" each point should climb
    n_points = data.shape[0]

    # Initialize random means for each data point
    hills = list()

    for i in range(n_points):
        current_point = data[i, :]
        means = [current_point]

        for _ in range(max_iter):
            # Find points within the bandwidth distance from the current mean
            last_mean = means[-1]
            within_window = np.linalg.norm(data - last_mean, axis=1) < window_size

            # Update mean using the points within the bandwidth
            new_mean = np.mean(data[within_window], axis=0)
            means.append(new_mean)

            # Check for convergence
            if np.linalg.norm(last_mean - new_mean) < convergence_threshold:
                break

        hills.append(means)

    # 2: Assign cluster labels based on the final means
    unique_means = list(set([tuple(hill[-1]) for hill in hills]))
    cluster_labels_to_points = dict()
    cluster_coords_to_labels = dict()
    for cluster_label in range(len(unique_means)):
        cluster_labels_to_points[cluster_label] = list()
        cluster_coords_to_labels[unique_means[cluster_label]] = cluster_label

    for i, hill in enumerate(hills):
        # map this point to the specific cluster
        mean = hill[-1]
        original_point = hill[0]
        label = cluster_coords_to_labels[tuple(mean)]
        cluster_labels_to_points[label].append(i)

    # bring it all together: label -> centroid, list of original pts
    all_cluster_data = dict()
    for centroid_coords, label in cluster_coords_to_labels.items():
        original_pts = cluster_labels_to_points[label]
        all_cluster_data[label] = (centroid_coords, original_pts)

    return all_cluster_data

In [36]:
def compute_and_report_clusters(
    data: np.ndarray,
    window_size: int = 1.0,
    max_iter: int = 100,
    convergence_threshold: float = 1e-4,
) -> Dict[int, Tuple[Tuple[float], List[np.ndarray]]]:
    """Convenience wrapper around mean shift function."""
    clusters_of_descriptors = mean_shift(
        data=data,
        window_size=500,
        max_iter=1_000_000_0,
        convergence_threshold=1.5,
    )

    for label, pair in clusters_of_descriptors.items():
        print("==========================")
        print(f"Cluster #{label + 1} Report:")
        print(f"Centroid Coordinates: {pair[0]}")
        print(f"Cluster Members (by Image Index): {pair[1]}")

    return clusters_of_descriptors

In [37]:
np.array(train_img_building_only_features).shape

(10, 6291456, 3)

In [39]:
clusters_of_building_only_rgb_features = compute_and_report_clusters(
    data=np.array(train_img_building_only_features)[0, :, :],
    window_size=500,
    max_iter=1_000_000_0,
    convergence_threshold=1.5,
)