Extract monthly climate variables (ERA5Land) for the rgi subregions

In [1]:
import ee
import numpy as np
import json
import pandas as pd
import geopandas as gpd
import os
import numpy as np

In [2]:
#%%
# # # Trigger the authentication flow.
# ee.Authenticate()

# # # Initialize the library.
ee.Initialize()

In [3]:
# define folder and file paths
folder_AGVA = os.path.join('C:',os.sep,'Users','lzell','OneDrive - Colostate','Desktop',"AGVA")
folder_climate = os.path.join(folder_AGVA, "Climate")

In [4]:
### get convex hull of the glacier in each O3Region

# load rgi outlines
asset_rgi = ee.FeatureCollection('projects/lzeller/assets/rgi_2km_o3regions')

# aggregate list of unique subregions
subregions = asset_rgi.aggregate_array("O3Region").distinct().sort().remove(0)

# define function that gets convex hull of glaciers
def get_chull(region):
  
    # get geometry of the glaciers
    rgio3 = asset_rgi.filter(ee.Filter.eq('O3Region', region)).geometry()

    # get convex hull of the region
    rgio3_chull = rgio3.convexHull();

    # create feature to return
    feature_return = ee.Feature(rgio3_chull, {"O3Region":region})
    
    return feature_return

# get the bounding geometries for each
o3_chulls = ee.FeatureCollection( subregions.map(lambda x : get_chull(x) ))

In [5]:
### load in ERA5Land monthly data
era5land_monthly = ee.ImageCollection('ECMWF/ERA5_LAND/MONTHLY_AGGR')

# define a function that will grab the era5land data for a specific year/month
def get_year_month_era5(y1,y2,m1,m2):
    subset_era5 = era5land_monthly.filter(ee.Filter.calendarRange(y1, y2, 'year'))
    subset_era5 = subset_era5.filter(ee.Filter.calendarRange(m1, m2, 'month'))
    return subset_era5

# Create a function to calculate the mean value of a raster within a feature geometry
def calculate_mean(raster, feature):
    
    # Calculate the mean values for the current month
    mean = raster.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=feature.geometry(),
        scale=100,
        maxPixels=1e13
    )
    
    # return the mean
    return mean
        
# define function that will get mean ERA5 value of given variables, given a year, month, geometry, variables
# def get_ERA5_data(variables, year, month, feature):
    
#     # subset to just this year/month
#     subset_data = ee.Image(get_year_month_era5(year, year, month, month).first())
    
#     # select just the variable in question
#     subset_data = subset_data.select(variables)
    
#     # get the average of this data within the feature geometry
#     subset_average = calculate_mean(ee.Image(subset_data), feature)
    
#     return subset_average

def get_ERA5_daterange(variable, year_start, year_end, month_start, month_end, feature):
    
    # subset to just these years/months
    subset_data = get_year_month_era5(year_start, year_end, month_start, month_end)
    
    # select just the variable in question
    subset_data = subset_data.select(variable).toBands()
    
    # get the average of this data within the feature geometry
    subset_average = calculate_mean(ee.Image(subset_data), feature)
    
    return subset_average

In [6]:
# for each subregion, send it off to get the data
for region_n in [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]:
    
    print(f"Starting Region {region_n}")

    # get just this region geometry
    subset_chull = o3_chulls.filter(ee.Filter.eq('O3Region', region_n))

    # get each of the data variables you want
    data_t2m = get_ERA5_daterange('temperature_2m', 2018, 2022, 1, 12, subset_chull.first())
    data_precip = get_ERA5_daterange('total_precipitation_sum', 2018, 2022, 1, 12, subset_chull.first())
    data_snow = get_ERA5_daterange('snowfall_sum', 2018, 2022, 1, 12, subset_chull.first())

    # format into df
    df_t2m = pd.DataFrame.from_dict(data_t2m.getInfo(), orient='index').reset_index()
    df_precip = pd.DataFrame.from_dict(data_precip.getInfo(), orient='index').reset_index()
    df_snow = pd.DataFrame.from_dict(data_snow.getInfo(), orient='index').reset_index()


    # construct a new df combining all the data
    df_all = pd.DataFrame()

    # add in date data
    df_all['date'] = [i[:6] for i in df_t2m['index']]
    df_all['year'] = [int(i[:4]) for i in df_t2m['index']]
    df_all['month'] = [int(i[4:6]) for i in df_t2m['index']]

    # add variables
    df_all['temperature_2m'] = df_t2m[0]
    df_all['total_precipitation_sum'] = df_precip[0]
    df_all['snowfall_sum'] = df_snow[0]

    # save to computer
    out_path = os.path.join(folder_climate, "O3Regions", f'Region_{str(region_n).zfill(2)}_monthly.csv')
    df_all.to_csv(out_path, index=False)

print("Done!")

Starting Region 1
Starting Region 2
Starting Region 3
Starting Region 4
Starting Region 5
Starting Region 6
Starting Region 7
Starting Region 8
Starting Region 9
Starting Region 10
Starting Region 11
Starting Region 12
Starting Region 13
Starting Region 14
Starting Region 15
Starting Region 16
Done!
