In [2]:
import geopandas as gpd
import pandas as pd
from shapely import wkt
import matplotlib.colors as colors
import tempfile
import numpy as np
import shutil

from huggingface_hub import hf_hub_download

### Preamble
We'll define the locations and download the split files as in the previous notebook. Again, we will limit our visualisations to Massachusetts, and this time, take only 5 tiles (from the training split)

In [3]:
HUGGINGFACE_REPO_ID = "M3LEO-miniset/conus"

chipdefs_remote = 'conus_partitions_aschips_293d95e3ee589_miniset.geojson'
splitdefs_remote = 'conus_partitions_aschips_293d95e3ee589_splits_60bands_angle09_60-20-20_miniset.csv'

datasets = ['esaworldcover-2020', 's1grd-2020', 's2rgbm-2020']

In [4]:
chipdefs_local = hf_hub_download(repo_id=HUGGINGFACE_REPO_ID, filename=chipdefs_remote, repo_type='dataset')
splitdefs_local = hf_hub_download(repo_id=HUGGINGFACE_REPO_ID, filename=splitdefs_remote, repo_type='dataset')

print(f"Chip definitions downloaded to {chipdefs_local}")
print(f"Split definitions downloaded to {splitdefs_local}")

Chip definitions downloaded to /home/matt/.cache/huggingface/hub/datasets--M3LEO-miniset--conus/snapshots/a5b2fc3f3e08e4e58f4038f3969b93beaab7d168/conus_partitions_aschips_293d95e3ee589_miniset.geojson
Split definitions downloaded to /home/matt/.cache/huggingface/hub/datasets--M3LEO-miniset--conus/snapshots/a5b2fc3f3e08e4e58f4038f3969b93beaab7d168/conus_partitions_aschips_293d95e3ee589_splits_60bands_angle09_60-20-20_miniset.csv


In [5]:
chips_df = gpd.read_file(chipdefs_local)
splits_df = pd.read_csv(splitdefs_local)

In [6]:
#Limit to masssachusetts
with open('../data/massachusetts.wkt') as f:
    bounds = wkt.load(f)

mass_chips_df = chips_df[chips_df.intersects(bounds)]
mass_splits_df = mass_chips_df.merge(splits_df, on='identifier')

In [7]:
#Sample 5 chips randomly from the training split
sampled_chips_df = mass_splits_df[mass_splits_df['split'] == 'train'].sample(5)

In [8]:
sampled_chips_df.explore()

### Download chips

Now we can download the chips we want from HuggingFace. You can download the data for any area you're interested in by constructing a .wkt file and using it as in these notebooks. Make sure that the area you want is actually covered by M3LEO, and that you're using the right HuggingFace repository - there's a different one for each AOI (Europe, China etc.). 

We will download them to a folder ./notebooks/tmp/ for this tutorial.

If you get a 416 error, try restarting the notebook. Otherwise you might need to clear the huggingface cache, or deleting the tmp folder under ./notebooks.

In [11]:
#Download the corresponding files
for dataset in datasets:
    dataset_target = f'conus_partitions_aschips_293d95e3ee589/{dataset}'
    fnames = [f'{dataset_target}/{chip}.tif' for chip in sampled_chips_df['identifier']]

    for fname in fnames:
        _localfname = hf_hub_download(repo_id=HUGGINGFACE_REPO_ID, filename=fname, repo_type='dataset', local_dir=f'./tmp/')
        #print(f"Downloaded {fname} to {_localfname}")