In [1]:
%load_ext autoreload
%autoreload 2

In [90]:
import datetime as dt
import glob

import numpy as np
import pandas as pd
from pyproj import CRS

from torchgeo.experimental.gpd_index import (
    build_fake_sparse_rasters,
    build_raster_index,
    jitter_geometries,
)

In [95]:
# generate lat and lons that we will use for origins of different tiles for
# various "collections" or "datasets". We will use these to generate fake data
# to simulate sparse, overlapping rasters. We keep them in UTM zone in order
# use a fake example with 32610 CRS along side 3857 and 4326
num_overlapping_sites = 10
rng = np.random.default_rng(42)
random_lat_lon_origins = [
    (
        rng.uniform(-123, -117),  # within UTM 32610 North Hemisphere zone
        rng.uniform(35, 55),  # on land
    )
    for _ in range(num_overlapping_sites)
]

targets = build_fake_sparse_rasters(
    output_dir='/tmp/example_rasters/targets',
    lat_lons=random_lat_lon_origins,
    crs=CRS.from_epsg(4326),
    num_dates=3,
    resolution_meters=30,
    start_date=dt.datetime(2023, 1, 1),
    end_date=dt.datetime(2023, 1, 31),
    nbands=1,
    rng=rng,
)

sentinel2 = build_fake_sparse_rasters(
    output_dir='/tmp/example_rasters/sentinel2',
    lat_lons=random_lat_lon_origins,
    crs=CRS.from_epsg(3857),  # likely actually UTM
    num_dates=10,
    resolution_meters=10,
    start_date=dt.datetime(2023, 1, 1),
    end_date=dt.datetime(2023, 1, 31),
    nbands=4,
    rng=rng,
)

landsat = build_fake_sparse_rasters(
    output_dir='/tmp/example_rasters/landsat',
    lat_lons=random_lat_lon_origins,
    crs=CRS.from_epsg(32610),
    num_dates=10,
    resolution_meters=30,
    start_date=dt.datetime(2023, 1, 1),
    end_date=dt.datetime(2023, 1, 31),
    nbands=4,
    rng=rng,
)

# sanity check
len(glob.glob('/tmp/example_rasters/**/*.tif'))

230

In [92]:
date_regex = r'_(\d{8})\.tif$'

target_tiff_index = build_raster_index(
    tifs=targets, date_regex=date_regex, collection='target'
)
sentinel2_tiff_index = build_raster_index(
    tifs=sentinel2, date_regex=date_regex, collection='sentinel2'
)
landsat_tiff_index = build_raster_index(
    tifs=landsat, date_regex=date_regex, collection='landsat'
)

tif_index = pd.concat([target_tiff_index, sentinel2_tiff_index, landsat_tiff_index])

In [93]:
tif_index.sample(frac=1.0).head()

Unnamed: 0,crs,geometry,location,datetime,collection
28,EPSG:4326,"POLYGON ((-118.00726 47.60634, -118.00726 47.6...",/tmp/example_rasters/targets/9_20230105.tif,2023-01-05,target
46,EPSG:32610,"POLYGON ((-122.1939 43.98045, -122.1939 44.007...",/tmp/example_rasters/landsat/4_20230108.tif,2023-01-08,landsat
60,EPSG:32610,"POLYGON ((-119.09373 51.42689, -119.09373 51.4...",/tmp/example_rasters/landsat/6_20230125.tif,2023-01-25,landsat
43,EPSG:3857,"POLYGON ((-122.22234 44.00126, -122.22234 44.0...",/tmp/example_rasters/sentinel2/4_20230115.tif,2023-01-15,sentinel2
53,EPSG:32610,"POLYGON ((-120.72998 53.5075, -120.72998 53.53...",/tmp/example_rasters/landsat/5_20230129.tif,2023-01-29,landsat


In [94]:
jitter_geometries(tif_index, max_offset=0.005).explore(color='collection', legend=True)

In [None]:
import geopandas as gpd
import networkx as nx


def cluster_geometries(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    """Possible helper to clump tiles across collections with interecting tiles."""
    spatial_index = gdf.sindex
    pairs = []
    for i, geom in enumerate(gdf.geometry):
        possible_matches = list(spatial_index.intersection(geom.bounds))
        for j in possible_matches:
            if i != j and geom.intersects(gdf.geometry.iloc[j]):
                pairs.append((i, j))
        graph = nx.Graph()
    graph.add_edges_from(pairs)
    clusters = {
        node: idx
        for idx, component in enumerate(nx.connected_components(graph))
        for node in component
    }
    gdf['cluster'] = gdf.index.map(clusters)

    return gdf

In [100]:
cluster_geometries(tif_index).groupby('cluster').nunique()

Unnamed: 0_level_0,crs,geometry,location,datetime,collection
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,3,5,29,20,3
1,3,5,29,21,3
2,3,5,29,18,3
3,3,7,29,19,3
4,3,5,29,17,3
5,3,5,29,21,3
6,3,7,29,19,3
7,3,3,9,9,3
8,3,3,9,6,3
9,3,3,9,8,3
