In [1]:
%pip install MiniSom


Collecting MiniSom
  Downloading minisom-2.3.5.tar.gz (12 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: MiniSom
  Building wheel for MiniSom (setup.py) ... [?25ldone
[?25h  Created wheel for MiniSom: filename=MiniSom-2.3.5-py3-none-any.whl size=12030 sha256=1226ab93a29bd1eda14bc176aef36d6c345c02e0808654cd586ad3479fc1c945
  Stored in directory: /home/jovyan/.cache/pip/wheels/19/db/95/5e53bc2b88a328217fdf9f2886cafbe86b0df274f4b601f572
Successfully built MiniSom
Installing collected packages: MiniSom
Successfully installed MiniSom-2.3.5
Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
from minisom import MiniSom # Import the library
import matplotlib.pyplot as plt
from matplotlib.patches import Patch

# --- 1. Prepare Data ---
# Get the scaled 'features' vector from our 'route_ml_ready' DataFrame
# We need to convert it to a NumPy array for MiniSom
data_for_som = np.array(route_ml_ready.select("features").rdd.map(lambda row: row[0].toArray()).collect())
num_features = data_for_som.shape[1]

# --- 2. Initialize and Train the SOM ---
# Define the size of the SOM grid (e.g., 10x10 neurons)
map_size_x = 10
map_size_y = 10

# Initialize the SOM
# sigma: radius of the neighborhood function
# learning_rate: how much weights are adjusted during training
som = MiniSom(map_size_x, map_size_y, num_features, sigma=1.5, learning_rate=0.5, 
              neighborhood_function='gaussian', random_seed=42)

# Initialize the weights randomly
som.random_weights_init(data_for_som)

# Train the SOM for a number of iterations
print(f"--- Training SOM ({map_size_x}x{map_size_y} grid)... ---")
som.train_random(data_for_som, 1000) # Train for 1000 iterations
print("--- SOM Training Complete ---")

# --- 3. Visualize the SOM Map (U-Matrix and Clusters) ---
# The U-Matrix visualizes distances between neighboring neurons (darker = larger distance/cluster boundary)
plt.figure(figsize=(10, 10))
plt.pcolor(som.distance_map().T, cmap='bone_r') # U-Matrix
plt.colorbar()

# Now, let's map each route to its winning neuron on the SOM and plot markers
# We can use different markers/colors if we had a known 'true' label, 
# but here we'll just plot where each route landed on the map.
winning_neurons = np.array([som.winner(x) for x in data_for_som])

# Plot each data point on the map with a small offset (jitter) for visibility
for i, pos in enumerate(winning_neurons):
    plt.plot(pos[0] + np.random.rand()*.8 - 0.4, pos[1] + np.random.rand()*.8 - 0.4, 'o', markerfacecolor='None',
             markeredgecolor='C1', markersize=12, markeredgewidth=2)

plt.title('Self-Organizing Map (SOM) of Flight Routes', fontsize=16)
plt.xticks(np.arange(map_size_x))
plt.yticks(np.arange(map_size_y))
plt.grid()
plt.show()

# --- 4. (Optional) Get Cluster Assignments from SOM ---
# We can apply K-Means *on the SOM weights* to get cluster assignments if needed
# For now, the visual map is the primary output.


NameError: name 'route_ml_ready' is not defined