In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import json
import pandas as pd
import numpy as np
import datetime
import ee

## Import Base Data Files

In [None]:
ground_measures_metadata = pd.read_csv('/content/drive/MyDrive/snocast/eval/data/ground_measures_metadata.csv')
submission_format = pd.read_csv('/content/drive/MyDrive/snocast/eval/data/submission_format.csv')
run_date = '2022-02-10'
lookback = 15

In [None]:
# get latitude longitude for grids
f = open('/content/drive/MyDrive/snocast/eval/data/grid_cells.geojson')
grid_cells = json.load(f)
print('length grid_cells features: ', len(grid_cells['features']))

ids = []
lats = []
lons = []
bboxes = []

for grid_cell in grid_cells['features']:
    cell_id = grid_cell['properties']['cell_id']
    coordinates = grid_cell['geometry']['coordinates'][0]
    lon, lat = np.mean(coordinates, axis=0)
    northeast_corner = np.max(coordinates, axis=0)
    southwest_corner = np.min(coordinates, axis=0)
    # bbox = [min_lon, min_lat, max_lon, max_lat]
    bbox = np.concatenate([southwest_corner,northeast_corner])
    ids.append(cell_id)
    lats.append(lat)
    lons.append(lon)
    bboxes.append(bbox)

grid_cells_pd = pd.DataFrame({'location_id': ids, 
                             'latitude': lats, 
                             'longitude': lons, 
                             'bbox': bboxes})

In [None]:
# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
ee.Initialize()

In [None]:
# Import the MODIS Terra Snow Cover Daily Global 500m collection.
terra = ee.ImageCollection('MODIS/006/MOD10A1')

# Import the MODIS Aqua Snow Cover Daily Global 500m collection.
aqua = ee.ImageCollection('MODIS/006/MYD10A1')

In [None]:
# Need filter for max date to be one day ahead
max_date = (datetime.datetime.strptime(run_date,'%Y-%m-%d') + datetime.timedelta(days=1)).strftime('%Y-%m-%d')
min_date = (datetime.datetime.strptime(run_date,'%Y-%m-%d') - datetime.timedelta(days=lookback+1)).strftime('%Y-%m-%d')
print(min_date, max_date)

In [None]:
modis_cols = ['location_id','latitude','longitude']
unique_ids = grid_cells_pd[modis_cols]
print(unique_ids.shape)

In [None]:
terra_snow_cover = terra.select('NDSI_Snow_Cover').filterDate(min_date, max_date)
aqua_snow_cover = aqua.select('NDSI_Snow_Cover').filterDate(min_date, max_date)
terra_info = terra_snow_cover.getInfo()['features']
aqua_info = aqua_snow_cover.getInfo()['features']
print('Terra min date: ', terra_info[0]['properties']['system:index'])
print('Terra max date: ', terra_info[-1]['properties']['system:index'])
print('Aqua min date: ', aqua_info[0]['properties']['system:index'])
print('Aqua max date: ', aqua_info[-1]['properties']['system:index'])

In [None]:
output_cols = ['date',
            'longitude',
            'latitude',
            'time',
            'NDSI_Snow_Cover']
            
terra_list = []
aqua_list = []
terra_ids = []
aqua_ids = []

In [None]:
# Runs in 4 hours
for idx, row in unique_ids.iterrows():
  if idx % 250 == 0:
    print(idx)

  # Define a region of interest with a buffer zone of 500 m
  poi = ee.Geometry.Point(row['longitude'], row['latitude'])
  roi = poi.buffer(500)

  terra_data = terra_snow_cover.getRegion(roi, scale=500).getInfo()[1:]
  terra_ids.extend([row['location_id']]*len(terra_data))
  terra_list.extend(terra_data)

  aqua_data = aqua_snow_cover.getRegion(roi, scale=500).getInfo()[1:]
  aqua_ids.extend([row['location_id']]*len(aqua_data))
  aqua_list.extend(aqua_data)

  if idx % 4000 == 0 and idx != 0:
    print(idx)
    print('Saving output for {} ...'.format(run_date))

    terra_df = pd.DataFrame(terra_list, columns=output_cols)
    terra_df['location_id'] = terra_ids

    aqua_df = pd.DataFrame(aqua_list, columns=output_cols)
    aqua_df['location_id'] = aqua_ids

    terra_df.to_parquet('/content/drive/MyDrive/snocast/eval/data/modis/modis_terra_{}.parquet'.format(run_date))
    aqua_df.to_parquet('/content/drive/MyDrive/snocast/eval/data/modis/modis_aqua_{}.parquet'.format(run_date))

print(idx)
print('Saving output for {} ...'.format(run_date))

terra_df = pd.DataFrame(terra_list, columns=output_cols)
terra_df['location_id'] = terra_ids

aqua_df = pd.DataFrame(aqua_list, columns=output_cols)
aqua_df['location_id'] = aqua_ids

terra_df.to_parquet(f'/content/drive/MyDrive/snocast/eval/data/modis/modis_terra_{run_date}.parquet')
aqua_df.to_parquet(f'/content/drive/MyDrive/snocast/eval/data/modis/modis_aqua_{run_date}.parquet')