# Polygon IDs for C2RCC outputs

The workflow used for C2RCC did not retain polygon IDs. This is needed for subsetting other data and matching to the corresponding C2RCC output. This notebook restores polygon IDs and saves the result as a JSON.

In [11]:
import os

In [1]:
import json

In [2]:
import numpy as np

In [3]:
import xarray as xr

In [5]:
result_dir = "result-{tileId}"

In [4]:
with open('AOIs.txt', 'r') as fp:
    poly_dict = json.load(fp)

Group by tile.

In [20]:
tile_polys = {}
for k, v in poly_dict.items():
    tileId = v['tileId']
    geo_region = v['geoRegion']
    geo_coords_str = geo_region[10:-2].replace(',', '').split()
    geo_coords_np = np.array([float(geo_coords_str[0]), float(geo_coords_str[1]), float(geo_coords_str[2]), float(geo_coords_str[5])])

    if tileId not in tile_polys:
        tile_polys[tileId] = {}
    tile_polys[tileId][k] = geo_coords_np

In [21]:
tile_polys

{'30UUA': {'2': array([-5.22444257, 49.9901843 , -4.88225479, 50.29003327]),
  '3': array([-4.81917128, 50.19269224, -4.59558622, 50.40467681]),
  '20': array([-5.7897289 , 49.9734232 , -5.3357743 , 50.27582106])},
 '30UVA': {'4': array([-4.32949749, 50.26538565, -4.02885352, 50.5075424 ]),
  '6': array([-4.02836874, 50.14143345, -3.59050258, 50.34954708]),
  '9': array([-3.70217003, 50.32171356, -3.4070158 , 50.54736093])},
 '30UVB': {'11': array([-4.42178593, 50.96017888, -3.80799343, 51.4427289 ]),
  '12': array([-3.50331426, 50.55463065, -3.27987264, 50.71209921])},
 '30UWB': {'13': array([-2.09595764, 50.55849793, -1.71524861, 50.75094301]),
  '16': array([-2.60528065, 50.55348792, -2.12042715, 50.6553193 ])},
 '30UXB': {'14': array([-1.58556407, 50.54458557, -0.64846788, 50.95668855])},
 '30UWC': {'15': array([-2.99680783, 51.4583999 , -2.60709778, 51.69910317])},
 '29UPR': {'17': array([-6.48086707, 49.81301712, -6.21948222, 50.02839047])},
 '30UUC': {'18': array([-5.80439836, 5

In [10]:
def lse_match(coords, candidates):
    lse = float('inf')
    arg_lse = None
    for k, v in candidates.items():
        squared_error = np.sum(np.square(coords - v))
        if squared_error < lse:
            lse = squared_error
            arg_lse = k
    return arg_lse

In [26]:
match_dict = {}

In [28]:
for tileId in tile_polys:
    tile_dir = result_dir.format(tileId=tileId)
    if not os.path.isdir(tile_dir):
        continue
    match_dict[tileId] = {}
    for fn in os.listdir(tile_dir):
        fp = os.path.join(tile_dir, fn)
        ds = xr.open_dataset(fp)
        lon_min, lon_max = ds['lon'].min().values, ds['lon'].max().values
        lat_min, lat_max = ds['lat'].min().values, ds['lat'].max().values
        coords = np.array([lon_min, lat_min, lon_max, lat_max], dtype='float')
        match_dict[tileId][fn] = lse_match(coords, tile_polys[tileId])

In [29]:
match_dict

{'30UUA': {'e3fce0fc-ea1a-11ef-9325-6fcf289158b1.nc': '20',
  '6ac9d8d0-ea19-11ef-a9e1-7657fa5eb4fd.nc': '2',
  '34359ea2-ea1a-11ef-a9af-23bbe33726e1.nc': '3'},
 '30UVA': {'75c5bed4-ea1e-11ef-9cf8-4600521331d5.nc': '6',
  'de5e89a4-ea1d-11ef-b169-26abf31e115f.nc': '4',
  '57278f42-ea1f-11ef-93ee-f3b7cac66662.nc': '9'},
 '30UVB': {'17b7b57a-ea20-11ef-813e-0e5230ef6e9b.nc': '11'},
 '30UWB': {'4015b672-ea28-11ef-ac32-4f563a2ee88c.nc': '16',
  '7da824c6-ea27-11ef-b74e-0bd1cabdfe55.nc': '13'},
 '30UXB': {'2161d2e6-ea29-11ef-baf8-7ed9f5863bda.nc': '14'},
 '30UWC': {'899d8506-ea2b-11ef-8ca5-f393179a2529.nc': '15'},
 '29UPR': {'778f88c2-ea2c-11ef-9f9b-e70fbe10fa11.nc': '17'},
 '30UUC': {},
 '30UVC': {'b4d35078-ea32-11ef-b40a-3e3203733142.nc': '19'},
 '31UCS': {'870cdad0-ea35-11ef-9e41-53315f7bea3a.nc': '22',
  'e8d13a06-ea33-11ef-9eb5-e35bc30585dd.nc': '21'},
 '31UCT': {'447b05e2-ea36-11ef-8194-46dc650b0088.nc': '23',
  '3b6c2098-ea37-11ef-b85b-c78422e5fb4a.nc': '24'},
 '31UCU': {'4a81170e-eae

In [30]:
with open('AOI_Lookup.txt', 'w') as fp:
    json.dump(match_dict, fp)