# Polynomial regression approach

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from kneed import KneeLocator
import rasterio
from rasterio import mask

In [2]:
# Step 1: Load raster data and preprocess
# Read the hyperspectral image (assuming you have a raster file)
pca_hs_image_path = '/Users/patrickangst/Documents/GitHub/UWW200_Master_Thesis_public/SpectralPatang/test_data_elbow/ang20180729t212542rfl/result/ang20180729t212542_rfl_v2r2_img_rectified/SPCA/PCA/OutputPCA_30_PCs'


In [3]:
with rasterio.open(pca_hs_image_path) as src:
    pca_hs_image = src.read([1, 2, 3, 4])  # Read the first 4 PCA bands
    profile = src.profile

In [4]:
# Step 2: Downsample the raster (reduce spatial resolution)
# Here we use average pooling to downsample (equivalent to 'terra::aggregate')
def downsample_raster(raster_data, factor=5):
    new_shape = (raster_data.shape[1] // factor, raster_data.shape[2] // factor)
    return np.mean(raster_data[:, ::factor, ::factor], axis=(1, 2))

# Downsample the PCA data
pca_data_downsampled = downsample_raster(pca_hs_image)

In [6]:
# Step 3: Reshape and preprocess the data
# Flatten the PCA data into a 2D array (each pixel is a row)
pca_data_reshaped = pca_hs_image.reshape((-1, pca_hs_image.shape[0])).T  # (n_samples, n_features)


In [5]:
pca_data_reshaped = pca_data_reshaped[~np.isnan(pca_data_reshaped).any(axis=1)]  # Remove rows with NaN


In [None]:
# Step 4: Calculate WSS for different k values
wss = []
k_range = range(2, 16)  # From 2 to 15 clusters
for k in k_range:
    kmeans = KMeans(n_clusters=k, n_init=10, random_state=42)
    kmeans.fit(pca_data_reshaped)
    wss.append(kmeans.inertia_)  # WSS is the inertia attribute of kmeans

In [None]:
# Step 5: Use Kneedle to find the "elbow"
kneedle = KneeLocator(k_range, wss, curve="convex", direction="decreasing")
optimal_k = kneedle.elbow

In [None]:
# Step 6: Plot the WSS and highlight the optimal k
plt.figure(figsize=(8, 6))
plt.plot(k_range, wss, marker='o', label="WSS (within-cluster sum of squares)")
plt.axvline(x=optimal_k, color='red', linestyle='--', label=f"Optimal k = {optimal_k}")
plt.title("Elbow Method for Optimal k")
plt.xlabel("Number of Clusters (k)")
plt.ylabel("Within-Cluster Sum of Squares (WSS)")
plt.legend()
plt.show()

In [None]:
# Step 7: Print the optimal number of clusters
print(f"Optimal number of clusters (k) determined by Kneedle: {optimal_k}")