# Encode geospatial data for MGM / VERGE processing

Elsewhere we assembled geospatial data for a buynmch of tiles.
Here we come up with encodings for each such entitiy.
The encodings will consist of a concatenation of a Multi-Point Proximity (MPP) encoding
and a one-hot vector indicating the entity type.

In [None]:
import pandas as pd
import numpy as np
import glob
import geopandas
import os
import pickle

from geo_encodings import MPPEncoder

## Setup

In [None]:
# This is the dimension of the (square) AOIs. Set thi to match what was used
# when the tiles were created.
aoi_size = 500

# This is the resiolution of the MPP encoding.
resolution = 50

## Data prep

In [None]:
# Read the list of labels.
fname = 'labels.csv'
labels = pd.read_csv(fname)
label_count = len(labels)
print(label_count)

label_id_lookup = {
    z['label']: z['id']
    for z in labels.to_dict('records')
}

In [None]:
# Set up the MPP encoder.
encoder = MPPEncoder(
    region=[0, 0, aoi_size, aoi_size],
    resolution=resolution,
    center=False
)
len(encoder)

## Processing

In [None]:
# Get a list of AOI tags.
globstring = 'data/tiles/*'
tile_dnames = glob.glob(globstring)
aoi_names = [
    z[z.rfind('/') + 1 :]
    for z in tile_dnames
]
print('%d AOIs' % len(aoi_names))


In [None]:
# Loop over tiles

for aoi_name in aoi_names:
    print('\nhandling tile %s' % aoi_name)

    # This will hold all encodings for this AOI
    tile_encodings_for_aoi = []

    # Get a list of all tiles for this AOI.
    globstring = 'data/tiles/%s/*.pq' % aoi_name
    fnames = glob.glob(globstring)
    print('%d tiles' % len(fnames))
    for fname in fnames:
        tile_vectors = []
        gdf = geopandas.read_parquet(fname)
        # for _, rec in gdf.iterrows():
        for _, rec in gdf.sample(frac=1).iterrows():
            encoding = encoder.encode(rec['geometry']).values()
            etype = np.zeros(label_count)
            label_string = '%s : %s' % (rec['category'], rec['label'])
            label_id = label_id_lookup[label_string]
            etype[label_id] = 1.0

            # Concatenate the encoding and type vectors.
            vector = np.hstack((etype, encoding))
            tile_vectors.append(vector)
        tile_encoding = np.vstack(tile_vectors)
        tile_encodings_for_aoi.append(tile_encoding)

    # Save all of the encodings for this tile.
    fname = 'data/encodings/%s.pkl' % aoi_name
    os.makedirs(os.path.dirname(fname), exist_ok=True)
    with open(fname, 'wb') as dest:
        pickle.dump(tile_encodings_for_aoi, dest)
    print('wrote %s' % (fname))
        

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Create a 2D array (example data)
data = tile_encodings_for_aoi[0]

# Plot as heatmap
plt.imshow(data, cmap='viridis', origin='upper')
plt.colorbar(label='Intensity')
plt.title('encodings')
plt.ylabel('entity number')
plt.xlabel('encoding element')
plt.show()


In [None]:
gdf[['category', 'label']].value_counts().sort_index()