In [None]:
!pip install boto3

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import boto3
import json
import pandas as pd
import numpy as np
from collections import defaultdict
from matplotlib import pyplot as plt # import libraries
import pandas as pd # import libraries
import netCDF4 # import libraries

In [None]:
BUCKET_NAME = 'drivendata-public-assets' # replace with your bucket name

# enter authentication credentials
s3 = boto3.resource('s3', aws_access_key_id = 'aws_access_key_id', 
                          aws_secret_access_key = 'aws_secret_access_key')

In [None]:
KEY = 'water_bodies_map.tar.gz' # replace with your object key

try:
  s3.Bucket(BUCKET_NAME).download_file(KEY, 'water_bodies_map.tar.gz')
  
except botocore.exceptions.ClientError as e:
  if e.response['Error']['Code'] == "404":
    print("The object does not exist.")
  else:
    raise

In [None]:
!tar -xf /content/water_bodies_map.tar.gz

In [None]:
!cp /content/ESACCI-LC-L4-WB-Map-150m-P13Y-2000-v4.0.nc /content/drive/MyDrive/snocast/eval/data/static/ESACCI-LC-L4-WB-Map-150m-P13Y-2000-v4.0.nc

In [None]:
fp='/content/drive/MyDrive/snocast/eval/data/static/ESACCI-LC-L4-WB-Map-150m-P13Y-2000-v4.0.nc' # your file name with the eventual path
nc = netCDF4.Dataset(fp) # reading the nc file and creating Dataset

In [None]:
wb_class = nc.variables['wb_class']
wb_lat = np.array(nc.variables['lat'])
wb_lon = np.array(nc.variables['lon'])

### Import Base Data Files

In [None]:
ground_measures_metadata = pd.read_csv('/content/drive/MyDrive/snocast/eval/data/ground_measures_metadata.csv')
submission_format = pd.read_csv('/content/drive/MyDrive/snocast/eval/data/submission_format.csv')

In [None]:
# get latitude longitude for grids
f = open('/content/drive/MyDrive/snocast/eval/data/grid_cells.geojson')
grid_cells = json.load(f)
print('length grid_cells features: ', len(grid_cells['features']))

In [None]:
ids = []
lats = []
lons = []
bboxes = []

for grid_cell in grid_cells['features']:
    cell_id = grid_cell['properties']['cell_id']
    coordinates = grid_cell['geometry']['coordinates'][0]
    lon, lat = np.mean(coordinates, axis=0)
    northeast_corner = np.max(coordinates, axis=0)
    southwest_corner = np.min(coordinates, axis=0)
    # bbox = [min_lon, min_lat, max_lon, max_lat]
    bbox = np.concatenate([southwest_corner,northeast_corner])
    ids.append(cell_id)
    lats.append(lat)
    lons.append(lon)
    bboxes.append(bbox)

grid_cells_pd = pd.DataFrame({'location_id': ids, 
                             'latitude': lats, 
                             'longitude': lons, 
                             'bbox': bboxes})

In [None]:
all_max_lat = grid_cells_pd.latitude.max()
all_min_lat = grid_cells_pd.latitude.min()
all_max_lon = grid_cells_pd.longitude.max()
all_min_lon = grid_cells_pd.longitude.min()
print(all_min_lon, all_min_lat, all_max_lon, all_max_lat)

In [None]:
# Trim water body file to only relevant lat/lon
wb_lat_values = (wb_lat < all_max_lat) & (wb_lat > all_min_lat)
wb_lon_values = (wb_lon < all_max_lon) & (wb_lon > all_min_lon)

reduced_wb = wb_class[wb_lat_values, wb_lon_values]
reduced_lat = wb_lat[wb_lat_values]
reduced_lon = wb_lon[wb_lon_values]

In [None]:
mean_wb_arr = []

for idx, row in grid_cells_pd.iterrows():
  if idx % 500 == 0:
    print(idx)
  min_lon, min_lat, max_lon, max_lat = row['bbox']

  lat_values = (reduced_lat < max_lat) & (reduced_lat > min_lat)
  lon_values = (reduced_lon < max_lon) & (reduced_lon > min_lon)
  mask = lon_values[np.newaxis, :] & lat_values[:, np.newaxis]

  mean_wb = reduced_wb[mask].mean()
  mean_wb_arr.append(mean_wb)

In [None]:
print(idx)
print(len(mean_wb_arr))

In [None]:
# 2 is water; 1 is land?
grid_cells_pd['water'] = mean_wb_arr 

In [None]:
grid_cells_pd = grid_cells_pd[['location_id', 'water']]

In [None]:
grid_cells_pd.to_parquet('/content/drive/MyDrive/snocast/eval/data/static/grid_water.parquet')