# Visualize VERGE Embeddings

## Processing Setup

In [1]:
# Google colab
import os
from google.colab import drive
drive.mount('/content/drive')
project_home = '/content/drive/MyDrive/Projects/verge'
os.chdir(project_home)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Local processing setup
# project_home = '..'

## Notebook Setup

In [3]:
import numpy as np
from typing import List, Tuple, Optional
import pickle
import pandas as pd


## Parameters

In [4]:
# The name of the ROI to use.
roi_name = 'newengland'

# The name of the general-purpose data directory.
data_home = '%s/data' % (project_home)

# The name of the ROI-specific data directory.
roi_home = '%s/data/%s' % (project_home, roi_name)

# The unique identifier of the model to be used.
run_id = '201b'

# Identifier of the splits file.
splits_id = '201'



## Load final embeddings

In [5]:
fname = '%s/batch/final_embeddings.pkl' % (roi_home)
with open(fname, 'rb') as source:
    finals = pickle.load(source)
print('%d final embeddings' % (len(finals)))

1675 final embeddings


## Compute the most similar tiles to a selected tile

In [15]:
# Pick a random embedding.
n = len(finals)
ix = np.random.choice(n)
print(ix)

# Compute distance between the selected embedding and all other embeddings.
distances = np.array([
    np.linalg.norm(finals[ix]['embedding'] - z['embedding'])
    for z in finals
])

# Sort the distances in ascending order.
sorted_indices = np.argsort(distances)
print(sorted_indices[:10])
print(distances[sorted_indices[:10]])

key_final = finals[ix]
sim_finals = [finals[i] for i in sorted_indices[1:5]]
alt_finals = [finals[i] for i in sorted_indices[-5:-1]]


1486
[1486 1268  891  710  206  225  935  941  737  936]
[0.         0.1960096  0.21700543 0.22761612 0.23175062 0.23274286
 0.23790628 0.24517575 0.24924517 0.2523062 ]


In [16]:
import folium

# Create a map centered on the key tile
center_lat = key_final['center_lat']
center_lon = key_final['center_lon']
m = folium.Map(location=[center_lat, center_lon], zoom_start=10,
               width=800, height=500)

# Add bounding box for key_final
folium.Rectangle(
    bounds=[[key_final['lat0'], key_final['lon0']], [key_final['lat1'], key_final['lon1']]],
    color='blue',
    fill=True,
    fill_color='blue',
    fill_opacity=0.2,
    tooltip='Key Tile'
).add_to(m)

# Add bounding boxes for sim_finals
for i, sim_final in enumerate(sim_finals):
    folium.Rectangle(
        bounds=[[sim_final['lat0'], sim_final['lon0']], [sim_final['lat1'], sim_final['lon1']]],
        color='green',
        fill=True,
        fill_color='green',
        fill_opacity=0.2,
        tooltip=f'Similar Tile {i+1}'
    ).add_to(m)

# Add bounding boxes for alt_finals
for i, sim_final in enumerate(alt_finals):
    folium.Rectangle(
        bounds=[[sim_final['lat0'], sim_final['lon0']], [sim_final['lat1'], sim_final['lon1']]],
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.2,
        tooltip=f'Dissimilar Tile {i+1}'
    ).add_to(m)

# Display the map
display(m)

## Clustering

In [19]:
# Get a matrix with all embeddings.
embeddings = np.array([z['embedding'] for z in finals]).squeeze()
embeddings.shape

(1675, 128)

In [23]:
from sklearn.cluster import KMeans

# Define the number of clusters
n_clusters = 10

# Initialize and fit the KMeans model
kmeans = KMeans(n_clusters=n_clusters, random_state=0, n_init=10)
cluster_labels = kmeans.fit_predict(embeddings)

# Print the cluster labels
print(cluster_labels)

[6 6 3 ... 3 1 6]


In [24]:
import folium
import pandas as pd

# Create a list of dictionaries with tile information for easier access
tile_info_lookup = {
    i: {'center_lat': tile['center_lat'], 'center_lon': tile['center_lon'],
        'lat0': tile['lat0'], 'lon0': tile['lon0'], 'lat1': tile['lat1'],
        'lon1': tile['lon1']}
    for i, tile in enumerate(finals)
}

# Get the identifiers for each tile
idents = list(tile_info_lookup.keys())


# Create a map centered on the approximate center of the data
# (using the first tile as a starting point)
if idents:
    first_tile_info = tile_info_lookup[idents[0]]
    m = folium.Map(location=[first_tile_info['center_lat'], first_tile_info['center_lon']], zoom_start=6, width=800, height=500)
else:
    m = folium.Map(location=[0, 0], zoom_start=2, width=800, height=500) # Default map if no idents

# Define a color map for the clusters
# You can adjust the colors based on the number of clusters
colors = [
    'red', 'blue', 'green', 'purple', 'orange', 'darkred',
    'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue',
    'darkpurple', 'white', 'pink', 'lightblue', 'lightgreen',
    'gray', 'black', 'lightgray', 'gold'
]

# Ensure enough colors for the number of clusters
if n_clusters > len(colors):
    print(f"Warning: Not enough colors defined for {n_clusters} clusters. Using repeating colors.")
    colors = (colors * ((n_clusters // len(colors)) + 1))[:n_clusters]


# Add each tile to the map with its cluster color
for i, ident in enumerate(idents):
    tile_info = tile_info_lookup[ident]
    cluster = cluster_labels[i]
    color = colors[cluster]

    radius = 10  # Adjust the radius as needed
    folium.CircleMarker(
        location=[tile_info['center_lat'], tile_info['center_lon']],
        radius=radius,
        color=color,
        stroke=False,
        fill=True,
        fill_opacity=0.6,
        opacity=1,
    ).add_to(m)

    # folium.Rectangle(
    #     bounds=[[tile_info['lat0'], tile_info['lon0']], [tile_info['lat1'], tile_info['lon1']]],
    #     color=color,
    #     fill=True,
    #     fill_color=color,
    #     fill_opacity=0.4,
    #     tooltip=f"Tile: {ident}, Cluster: {cluster}"
    # ).add_to(m)

# Display the map
display(m)

In [None]:
import folium
import pandas as pd

# Create a map centered on the approximate center of the data
# (using the first tile as a starting point)
if idents:
    first_tile_info = tile_info_lookup[idents[0]]
    m = folium.Map(location=[first_tile_info['center_lat'], first_tile_info['center_lon']], zoom_start=6, width=800, height=500)
else:
    m = folium.Map(location=[0, 0], zoom_start=2, width=800, height=500) # Default map if no idents

# Define a color map for the clusters
# You can adjust the colors based on the number of clusters
colors = [
    'red', 'blue', 'green', 'purple', 'orange', 'darkred',
    'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue',
    'darkpurple', 'white', 'pink', 'lightblue', 'lightgreen',
    'gray', 'black', 'lightgray', 'gold'
]

# Ensure enough colors for the number of clusters
if n_clusters > len(colors):
    print(f"Warning: Not enough colors defined for {n_clusters} clusters. Using repeating colors.")
    colors = (colors * ((n_clusters // len(colors)) + 1))[:n_clusters]


# Add each tile to the map with its cluster color
for i, ident in enumerate(idents):
    tile_info = tile_info_lookup[ident]
    cluster = cluster_labels[i]
    color = colors[cluster]

    # radius = 10  # Adjust the radius as needed
    # folium.CircleMarker(
    #     location=[tile_info['center_lat'], tile_info['center_lon']],
    #     radius=radius,
    #     color=color,
    #     stroke=False,
    #     fill=True,
    #     fill_opacity=0.6,
    #     opacity=1,
    # ).add_to(m)

    folium.Rectangle(
        bounds=[[tile_info['lat0'], tile_info['lon0']], [tile_info['lat1'], tile_info['lon1']]],
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.4,
        tooltip=f"Tile: {ident}, Cluster: {cluster}"
    ).add_to(m)

# Display the map
display(m)