## Functionality
Uses KNN to group pixels into 5-6 color groups (change the n_clusters argument)
Saves each color to a separate layer

## pre-reqs:
1. Historical maps (e.g., https://collections.leventhalmap.org/search/commonwealth:q524n4440) has been saved to a local `images/` directory

Inspired by Daniel Steiner: https://www.youtube.com/watch?v=UA63zaIXCZw


In [None]:
import glob

%matplotlib
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from PIL import Image
import numpy as np
import os
import cv2

In [None]:
def preprocess(image):
    # Reshape the image to be a list of pixels
    pixels = image.reshape(-1, 3)

    # Perform K-means clustering
    kmeans = KMeans(n_clusters=6, random_state=42).fit(pixels)
    labels = kmeans.labels_
    colors = kmeans.cluster_centers_.astype(int)

    # Replace each pixel with the color of its cluster
    new_image = colors[labels].reshape(image.shape)
    
    return new_image, colors

In [None]:
# Define a function to save each color as a separate image
def save_color_layer(image_array, color, filename, color_name):
    mask = np.all(image_array == color, axis=-1)
    layer = np.zeros_like(image_array)
    layer[mask] = color
    layer_image = Image.fromarray(layer.astype(np.uint8))
    layer_image.save(f"color_layers/{filename}_{color_name}.png")

In [None]:
images = glob.glob('images/*')
# Create output directory if not exists
output_dir = "color_layers"
os.makedirs(output_dir, exist_ok=True)

names = [
    'Annotations', 
    'Historical Shoreline',
    'Water_1995',
    'Land Filled', 
    'Unchanged Shoreline', 
    ]

for image_file in images:
    # read
    image_raw = cv2.imread(image_file)
    image_raw = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)

    # pre-process the images pixels
    image, colors = preprocess(image_raw)
    sorted_colors = colors[colors[:, 0].argsort()]

    filename = image_file.split('/')[1].split('.')[0]
    
    # Process and save each color layer as a separate image
    for color, name in zip(sorted_colors[:5], names):
        save_color_layer(image, color=color, filename=filename, color_name=name)

    print("Color layers have been saved as individual images.")
