In [None]:
!pip install boto3

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import boto3
import json
import pandas as pd
import numpy as np
from collections import defaultdict
from matplotlib import pyplot as plt
import pandas as pd
import netCDF4

In [None]:
BUCKET_NAME = 'drivendata-public-assets'

# enter authentication credentials
s3 = boto3.resource('s3', aws_access_key_id = 'aws_access_key_id', 
                          aws_secret_access_key = 'aws_secret_access_key')

In [None]:
KEY = 'land_cover_map.tar.gz'

try:
  s3.Bucket(BUCKET_NAME).download_file(KEY, 'land_cover_map.tar.gz')
  
except botocore.exceptions.ClientError as e:
  if e.response['Error']['Code'] == "404":
    print("The object does not exist.")
  else:
    raise

In [None]:
!tar -xf /content/land_cover_map.tar.gz

In [None]:
!cp /content/C3S-LC-L4-LCCS-Map-300m-P1Y-2020-v2.1.1.nc /content/drive/MyDrive/snocast/eval/data/static/C3S-LC-L4-LCCS-Map-300m-P1Y-2020-v2.1.1.nc

In [None]:
fp='/content/drive/MyDrive/snocast/eval/data/static/C3S-LC-L4-LCCS-Map-300m-P1Y-2020-v2.1.1.nc' # your file name with the eventual path
nc = netCDF4.Dataset(fp) # reading the nc file and creating Dataset

In [None]:
# http://maps.elie.ucl.ac.be/CCI/viewer/download/ESACCI-LC-QuickUserGuide-LC-Maps_v2-0-7.pdf
lccs_class = nc.variables['lccs_class']
lccs_lat = np.array(nc.variables['lat'])
lccs_lon = np.array(nc.variables['lon'])

### Import Base Data Files

In [None]:
ground_measures_metadata = pd.read_csv('/content/drive/MyDrive/snocast/eval/data/ground_measures_metadata.csv')
submission_format = pd.read_csv('/content/drive/MyDrive/snocast/eval/data/submission_format.csv')

In [None]:
# get latitude longitude for grids
f = open('/content/drive/MyDrive/snocast/eval/data/grid_cells.geojson')
grid_cells = json.load(f)
print('length grid_cells features: ', len(grid_cells['features']))

In [None]:
ids = []
lats = []
lons = []
bboxes = []

for grid_cell in grid_cells['features']:
    cell_id = grid_cell['properties']['cell_id']
    coordinates = grid_cell['geometry']['coordinates'][0]
    lon, lat = np.mean(coordinates, axis=0)
    northeast_corner = np.max(coordinates, axis=0)
    southwest_corner = np.min(coordinates, axis=0)
    # bbox = [min_lon, min_lat, max_lon, max_lat]
    bbox = np.concatenate([southwest_corner,northeast_corner])
    ids.append(cell_id)
    lats.append(lat)
    lons.append(lon)
    bboxes.append(bbox)

grid_cells_pd = pd.DataFrame({'location_id': ids, 
                             'latitude': lats, 
                             'longitude': lons, 
                             'bbox': bboxes})

In [None]:
all_max_lat = grid_cells_pd.latitude.max()
all_min_lat = grid_cells_pd.latitude.min()
all_max_lon = grid_cells_pd.longitude.max()
all_min_lon = grid_cells_pd.longitude.min()
print(all_min_lon, all_min_lat, all_max_lon, all_max_lat)

In [None]:
# Figure out how to trim to only relevant lat lon
lccs_lat_values = (lccs_lat < all_max_lat) & (lccs_lat > all_min_lat)
lccs_lon_values = (lccs_lon < all_max_lon) & (lccs_lon > all_min_lon)

reduced_lccs = np.squeeze(lccs_class[:, lccs_lat_values, lccs_lon_values])
reduced_lat = lccs_lat[lccs_lat_values]
reduced_lon = lccs_lon[lccs_lon_values]

In [None]:
lccs_arr = []

for idx, row in grid_cells_pd.iterrows():
  if idx % 100 == 0:
    print(idx)
  min_lon, min_lat, max_lon, max_lat = row['bbox']

  lat_values = (reduced_lat < max_lat) & (reduced_lat > min_lat)
  lon_values = (reduced_lon < max_lon) & (reduced_lon > min_lon)
  mask = lon_values[np.newaxis, :] & lat_values[:, np.newaxis]

  arr = reduced_lccs[mask]
  lccs_cat, lccs_count = np.unique(arr, return_counts=True)
  lccs_len = len(arr)

  land_cover = {}
  land_cover['location_id'] = row['location_id']
  lccs_order = np.flip(np.argsort(lccs_count))
  for i in range(3):
    if i+1 <= len(lccs_order):
      land_cover[f'lccs_{i}'] = lccs_cat[lccs_order[i]]
      land_cover[f'lccs_pct_{i}'] = lccs_count[lccs_order[i]]/lccs_len
    else:
      land_cover[f'lccs_{i}'] = 0
      land_cover[f'lccs_pct_{i}'] = np.nan
  
  lccs_arr.append(land_cover)


In [None]:
print(idx)
print(len(lccs_arr))

In [None]:
lccs_df = pd.DataFrame(lccs_arr)

In [None]:
lccs_df.head()

In [None]:
lccs_df.to_parquet('/content/drive/MyDrive/snocast/eval/data/static/grid_lccs.parquet')