# Code to reprocessing reanalysis/climate datasets used in Aguayo et al. (in review)
Developed by Rodrigo Aguayo (2020-2023)

In [None]:
import pandas as pd
import xarray as xr
import regionmask
import rioxarray as rioxr
import geopandas as gpd
import numpy as np
import os

os.chdir('/home/rooda/Dropbox/Patagonia/Data/') 
local  = "/media/rooda/Local Disk/Datasets"
days = np.array([31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31])

encode_pp   = {"pp": {"zlib": True, "complevel": 1, "dtype": "int16"}}
encode_t2m  = {'t2m':   {'dtype': 'int16', 'scale_factor': 0.01, '_FillValue': -9999}}
encode_tmax = {'tmax':  {'dtype': 'int16', 'scale_factor': 0.01, '_FillValue': -9999}}
encode_tmin = {'tmin':  {'dtype': 'int16', 'scale_factor': 0.01, '_FillValue': -9999}}

mask       = gpd.read_file("/home/rooda/Dropbox/Patagonia/GIS South/dem_mask.shp")

## ERA5

In [None]:
dict_pp  = {'tp':'pp', 'longitude':'lon', 'latitude':'lat'} # monthly averaged reanalysis (ok)
stack_pp = xr.open_dataset(os.path.join(local + "/ERA5/Monthly/ERA5_1959_2021m.nc"), chunks ="auto").rename(dict_pp)[["pp"]]
stack_pp = stack_pp.where((stack_pp.lon >= -76) & (stack_pp.lon <= -65) & (stack_pp.lat >= -56) & (stack_pp.lat <= -40), drop=True)
months  = xr.DataArray(days.repeat(2021-1959+1), coords=[stack_pp.time], name='month_length')
stack_pp = (stack_pp*months*1000).astype("int16")
stack_pp.to_netcdf("Precipitation/PP_ERA5_1959_2021m.nc", encoding = encode_pp)

dict_t2m  = {'longitude':'lon', 'latitude':'lat'} # monthly averaged reanalysis (ok)
stack_t2m = xr.open_dataset(os.path.join(local + "/ERA5/Monthly/ERA5_1959_2021m.nc"), chunks ="auto").rename(dict_t2m)[["t2m"]]
stack_t2m = stack_t2m.where((stack_t2m.lon >= -76) & (stack_t2m.lon <= -65) & (stack_t2m.lat >= -56) & (stack_t2m.lat <= -40), drop=True)
stack_t2m = (stack_t2m-273.15).round(2)
stack_t2m.to_netcdf("Temperature/Tavg_ERA5_1959_2021m.nc", encoding = encode_t2m)

In [None]:
# reanalysis 3-hourly data
dict_pp   = {'tp':'pp', 'longitude':'lon', 'latitude':'lat'}
stack_pp  = xr.open_mfdataset(os.path.join(local + "/ERA5/Hourly/ERA5*.nc"), concat_dim='time', combine='nested', chunks ="auto").rename(dict_pp)["pp"]  
mask_pp   = regionmask.mask_geopandas(mask, stack_pp)
stack_pp  = stack_pp.where(mask_pp >= 0, drop=True)
stack_pp  = stack_pp.sortby("time").resample(time='1D').sum(skipna=False)
stack_pp  = (stack_pp*3*1000).round(0)
stack_pp.to_netcdf("Precipitation/PP_ERA5_1959_2021d.nc")

In [None]:
dict_t2m   = {'longitude':'lon', 'latitude':'lat'}
stack_t2m  = xr.open_mfdataset(os.path.join(local + "/ERA5/Hourly/ERA5*.nc"), concat_dim='time', combine='nested', chunks ="auto").rename(dict_t2m)["t2m"] 
mask_t2m   = regionmask.mask_geopandas(mask, stack_t2m)
stack_t2m  = stack_t2m.where(mask_t2m >= 0, drop=True)
stack_t2m_max = stack_t2m.sortby("time").resample(time='1D').max()
stack_t2m_min = stack_t2m.sortby("time").resample(time='1D').min()
stack_t2m_max = (stack_t2m_max-273.15).round(2)
stack_t2m_min = (stack_t2m_min-273.15).round(2)
stack_t2m_max.to_netcdf("Temperature/Tmax_ERA5_1959_2021d.nc")
stack_t2m_min.to_netcdf("Temperature/Tmin_ERA5_1959_2021d.nc")

In [None]:
# monthly alternative
dict_t2m   = {'longitude':'lon', 'latitude':'lat'}
stack_t2m  = xr.open_mfdataset(os.path.join(local + "/ERA5/Hourly/ERA5*.nc"), concat_dim='time', combine='nested', chunks ="auto").rename(dict_t2m)["t2m"] 
stack_t2m  = stack_t2m.where((stack_t2m.lon >= -76) & (stack_t2m.lon <= -65) & (stack_t2m.lat >= -56) & (stack_t2m.lat <= -40), drop=True) - 273.15

In [None]:
stack_t2m_max = stack_t2m.sortby("time").resample(time='1D').max()
stack_t2m_max = stack_t2m_max.resample(time='MS').mean()
stack_t2m_max.to_netcdf("Temperature/Tmax_ERA5_1959_2021m.nc")

stack_t2m_min = stack_t2m.sortby("time").resample(time='1D').min()
stack_t2m_min = stack_t2m_min.resample(time='MS').mean()
stack_t2m_min.to_netcdf("Temperature/Tmin_ERA5_1959_2021m.nc")

## ERA5-LAND



In [None]:
 # monthly averaged reanalysis
dict_pp   = {'tp':'pp', 'longitude':'lon', 'latitude':'lat'}
stack_pp  = xr.open_dataset(os.path.join(local + "/ERA5_LAND/Monthly/PP_ERA5L_1950_2021m.nc"), chunks ="auto").rename(dict_pp)  
stack_pp  = stack_pp.where((stack_pp.lon >= -79) & (stack_pp.lon <= -64) & (stack_pp.lat >= -57) & (stack_pp.lat <= -40), drop=True)
stack_pp  = (stack_pp*1000).astype("int32")
stack_pp.to_netcdf("Precipitation/PP_ERA5L_1950_2021m.nc")

dict_t2m  = {'longitude':'lon', 'latitude':'lat'}
stack_t2m = xr.open_dataset(os.path.join(local, "/ERA5_LAND/Monthly/T2M_ERA5L_1950_2021m.nc"), chunks ="auto").rename(dict_t2m)  
stack_t2m = stack_t2m.where((stack_t2m.lon >= -79) & (stack_t2m.lon <= -64) & (stack_t2m.lat >= -57) & (stack_t2m.lat <= -40), drop=True)
stack_t2m = (stack_t2m-273.15).round(2)
stack_t2m.to_netcdf("Temperature/T2M_ERA5__1950_2021m.nc")

 # reanalysis 3-hourly data
stack_pp  = xr.open_mfdataset(os.path.join(local + "/ERA5_LAND/Hourly/PP_ERA5L_*.nc"), concat_dim='time', combine='nested', chunks ="auto")*1000     
stack_pp  = stack_pp.where((stack_pp.lon >= -79) & (stack_pp.lon <= -64) & (stack_pp.lat >= -57) & (stack_pp.lat <= -40), drop=True)
stack_pp  = stack_pp.resample(time='1D').mean()
stack_pp  = (stack_pp*1000).round(1)
stack_pp.to_netcdf("Precipitation/PP_ERA5L_1950_2021d.nc")

stack_t2m  = xr.open_mfdataset(os.path.join(local + "/ERA5_LAND/Hourly/T2M_ERA5L.*.nc"), concat_dim='time', combine='nested', chunks ="auto")-273.15
stack_t2m  = stack_t2m.where((stack_t2m.lon >= -79) & (stack_t2m.lon <= -64) & (stack_t2m.lat >= -57) & (stack_t2m.lat <= -40), drop=True)
stack_t2m_max = stack_t2m.resample(time='1D').max()
stack_t2m_min = stack_t2m.resample(time='1D').min()
stack_t2m_min = (stack_t2m_min-273.15).round(2)
stack_t2m_max = (stack_t2m_max-273.15).round(2)
stack_t2m_max.to_netcdf("Temperature/T2M_max_ERA5L_1950_2021d.nc")
stack_t2m_min.to_netcdf("Temperature/T2M_min_ERA5L_1950_2021d.nc")

dict_ws  = {'longitude':'lon', 'latitude':'lat'}
factor      = (2/10)**0.25 #C orrection from 10m to 2m
stack_ws    = xr.open_mfdataset(os.path.join(local + "/ERA5_LAND/Hourly/WS_ERA5L*.nc"), concat_dim='time', combine='nested', chunks ="auto").rename(dict_ws) 
stack_ws    = stack_ws.where((stack_ws.lon >= -79) & (stack_ws.lon <= -64) & (stack_ws.lat >= -57) & (stack_ws.lat <= -40), drop=True).sortby("time")
stack_ws["ws"]= (stack_ws.u10**2 + stack_ws.v10**2)**0.5
stack_ws["ws"] = stack_ws.ws*factor
stack_ws = stack_ws["ws"].resample(time='1D').mean()
stack_ws.to_netcdf("Wind_speed/WS_ERA5L_1950_2021d.nc")

dict_hr   = {'longitude':'lon', 'latitude':'lat'}
stack_d2m = xr.open_mfdataset(os.path.join(local + "/ERA5_LAND/Hourly/T2Md*.nc"), concat_dim='time', combine='nested', chunks ="auto")-273.15
stack_t2m = xr.open_mfdataset(os.path.join(local + "/ERA5_LAND/Hourly/T2M*.nc"), concat_dim='time', combine='nested', chunks ="auto")-273.15
stack_rh  = xr.merge([stack_d2m, stack_d2m]).rename(dict_hr) 
stack_rh  = stack_rh.where((stack_rh.lon >= -79) & (stack_rh.lon <= -64) & (stack_rh.lat >= -57) & (stack_rh.lat <= -40), drop=True).sortby("time")
stack_rh["hr"] = exp((17.625*stack_rh.d2m)/(243.04+stack_rh.d2m)) / exp((17.625*stack_rh.t2m)/(243.04+stack_rh.t2m))
stack_rh = stack_rh["hr"].resample(time='1D').mean()
stack_rh.to_netcdf("Relative_humidity/RH_ERA5L_1950_2021d.nc")

## MERRA2

In [None]:
# Monthly data 
dict_pp   = {'PRECTOT':'pp'}
stack_pp  = xr.open_mfdataset(os.path.join(local + "/MERRA2/*"), concat_dim='time', combine='nested', chunks ="auto").rename(dict_pp)[["pp"]]  
stack_pp  = stack_pp.where((stack_pp.lon >= -79) & (stack_pp.lon <= -64) & (stack_pp.lat >= -57) & (stack_pp.lat <= -40), drop=True).sortby("time")
months    = xr.DataArray(np.tile(days,2021-1980+1), coords=[stack_pp.time], name='month_length') # fix !!
stack_pp  = (stack_pp*months*86400).astype("int32")
stack_pp["time"] = pd.date_range(start='1980/01/01', end='2021/12/01', freq='MS')  
stack_pp.to_netcdf("Precipitation/PP_MERRA2_1980_2021m.nc", encoding = encode_pp)                              

dict_t2m  = {'TLML':'t2m'}
stack_t2m = xr.open_mfdataset(os.path.join(local + "/MERRA2/*"), combine='by_coords', chunks ="auto").sortby("time").rename(dict_t2m)[["t2m"]]
stack_t2m = stack_t2m.where((stack_t2m.lon >= -79) & (stack_t2m.lon <= -64) & (stack_t2m.lat >= -57) & (stack_t2m.lat <= -40), drop=True)
stack_t2m = (stack_t2m-273.15).round(2)
stack_t2m["time"] = pd.date_range(start='1980/01/01', end='2021/12/01', freq='MS')  
stack_t2m.to_netcdf("Temperature/Tavg_MERRA2_1980_2021m.nc", encoding = encode_t2m)  

## CSFR

In [None]:
local  = "/home/rooda/Dropbox/"

# Monthly Mean (4 per day) of 6-hour Accumulation
dict_pp  = {'A_PCP_L1_AccumAvg':'pp'}

stack_pp1  = xr.open_mfdataset(os.path.join(local + "/CSFR/1979_2010/pgbh*.nc"), concat_dim='time', combine='nested').rename(dict_pp)[["pp"]]
stack_pp2  = xr.open_mfdataset(os.path.join(local + "/CSFR/2011_2019/PP/pgbh*.nc"), concat_dim='time', combine='nested').rename(dict_pp)[["pp"]]
stack_pp   = xr.concat([stack_pp1, stack_pp2], dim = "time").pp*4 # Problem during union

stack_pp.coords['lon'] = (stack_pp.coords['lon'] + 180) % 360 - 180
stack_pp2 = stack_pp2.sortby(stack_pp1.lon)
stack_pp2  = stack_pp2.where((stack_pp2.lon >= -79) & (stack_pp2.lon <= -64) & (stack_pp2.lat >= -57) & (stack_pp2.lat <= -40), drop=True)
months    = xr.DataArray(np.tile(days, 2019-1979+1), coords=[stack_pp.time], name='month_length') # fix !!
stack_pp  = (stack_pp*months).astype("int32")
#tack_pp["time"] = pd.date_range(start='1979/01/01', end='2019/12/01', freq='MS')  
stack_pp.to_netcdf("Precipitation/PP_CSFR_1979_2019m.nc", encoding = encode_pp)

# Monthly Mean (4 per day) of 6-hour Accumulation
dict_t2m    = { 'TMP_L103_Avg':'t2m'}
stack_t2m1  = xr.open_mfdataset(os.path.join(local + "/CSFR/1979_2010/flxf*.nc"), concat_dim='time', combine='nested').sortby("time").rename(dict_t2m)[["t2m"]]
stack_t2m2  = xr.open_mfdataset(os.path.join(local + "/CSFR/2011_2019/T2M/flxf*.nc"), concat_dim='time', combine='nested').sortby("time").rename(dict_t2m)[["t2m"]]
stack_t2m2  = stack_t2m2.interp_like(stack_t2m1.t2m[0], method='linear')
stack_t2m   = xr.concat([stack_t2m1, stack_t2m2], dim = "time")
stack_t2m = stack_t2m1
stack_t2m.coords['lon'] = (stack_t2m.coords['lon'] + 180) % 360 - 180
stack_t2m = stack_t2m.where((stack_t2m.lon >= -79) & (stack_t2m.lon <= -64) & (stack_t2m.lat >= -57) & (stack_t2m.lat <= -40), drop=True)
stack_t2m["time"] = pd.date_range(start='1979/01/01', end='2010/12/01', freq='MS') 
stack_t2m = (stack_t2m-273.15).round(2)                  
stack_t2m.to_netcdf("Temperature/Tavg_CSFR_1979_2010m.nc", encoding = encode_t2m)

## REGCR2: RegCM4-CR2

In [None]:
dict_pp  = {'pr':'pp'}
stack_pp = xr.open_mfdataset(os.path.join(local + "/REGCR2/pr_*.nc"), combine='by_coords').rename(dict_pp).sortby("time")[["pp"]]
stack_pp = stack_pp.where((stack_pp.lon >= -79) & (stack_pp.lon <= -64) & (stack_pp.lat >= -57) & (stack_pp.lat <= -40), drop=True)
stack_pp["time"] = pd.date_range(start='1980/01/01', end='2015/12/01', freq='MS')  
months   = xr.DataArray(days.repeat(2015-1980+1), coords=[stack_pp.time], name='month_length')
stack_pp = (stack_pp*months*86400).astype("int32")
stack_pp.to_netcdf("Precipitation/PP_REGCR2_1980_2015m.nc", encoding = encode_pp)

dict_t2m  = {'tas':'t2m'}
stack_t2m = xr.open_mfdataset(os.path.join(local + "/REGCR2/tas_*.nc"), combine='by_coords').rename(dict_t2m).sortby("time")["t2m"] 
stack_t2m = stack_t2m.where((stack_t2m.lon >= -79) & (stack_t2m.lon <= -64) & (stack_t2m.lat >= -57) & (stack_t2m.lat <= -40), drop=True)
stack_t2m["time"] = pd.date_range(start='1980/01/01', end='2015/12/01', freq='MS') 
stack_t2m = (stack_t2m-273.15).round(2)
stack_t2m.to_netcdf("Temperature/T2M_REGCR2_1980_2015m.nc", encoding = encode_t2m)

## MSWEP v2.8 and MSWX

In [None]:
# Daily timestep
stack_pp = xr.open_mfdataset(os.path.join(local + "/MSWEP-MX/pr/Daily/*.nc"), combine='by_coords', chunks ="auto").rename({'precipitation':'pp'})  
stack_pp = stack_pp.where((stack_pp.lon >= -79) & (stack_pp.lon <= -64) & (stack_pp.lat >= -57) & (stack_pp.lat <= -40), drop=True)
stack_pp = stack_pp.astype("int16")
stack_pp.to_netcdf("Precipitation/PP_MSWEPv28_1979_2020d.nc", encoding = encode_pp)

In [None]:
stack_t2m = xr.open_mfdataset(os.path.join(local + "/MSWEP-MX/tas/Daily/*/Daily/*.nc"), combine='by_coords', chunks ="auto").rename({'air_temperature':'t2m'})
stack_t2m = stack_t2m.where((stack_t2m.lon >= -79) & (stack_t2m.lon <= -64) & (stack_t2m.lat >= -57) & (stack_t2m.lat <= -40), drop=True)
stack_t2m.to_netcdf("Temperature/Tavg_MSWX_1979_2019d.nc", encoding = encode_t2m)

In [None]:
stack_tmax = xr.open_mfdataset(os.path.join(local + "/MSWEP-MX/tmax/Daily/*/Daily/*.nc"), combine='by_coords', chunks ="auto").rename({'air_temperature':'t2m'})
stack_tmax = stack_tmax.where((stack_tmax.lon >= -79) & (stack_tmax.lon <= -64) & (stack_tmax.lat >= -57) & (stack_tmax.lat <= -40), drop=True)
stack_tmax.to_netcdf("Temperature/Tmax_MSWX_1979_2019d.nc", encoding = encode_t2m)

In [None]:
stack_tmin = xr.open_mfdataset(os.path.join(local + "/MSWEP-MX/tmin/Daily/*/Daily/*.nc"), combine='by_coords', chunks ="auto").rename({'air_temperature':'t2m'})
stack_tmin = stack_tmin.where((stack_tmin.lon >= -79) & (stack_tmin.lon <= -64) & (stack_tmin.lat >= -57) & (stack_tmin.lat <= -40), drop=True)
stack_tmin.to_netcdf("Temperature/Tmin_MSWX_1979_2019d.nc", encoding = encode_t2m)

In [None]:
# To monthly timestep 
dict_pp  = {'precipitation':'pp'}
stack_pp = xr.open_mfdataset(os.path.join(local + "/MSWEP-MX/pr/Monthly/*.nc"), combine='by_coords', chunks ="auto").rename(dict_pp)  
stack_pp = stack_pp.where((stack_pp.lon >= -79) & (stack_pp.lon <= -64) & (stack_pp.lat >= -57) & (stack_pp.lat <= -40), drop=True)
stack_pp = stack_pp.astype("int16")
stack_pp.to_netcdf("Precipitation/PP_MSWEPv28_1979_2020m.nc", encoding = encode_pp)

dict_t2m  = {'air_temperature':'t2m'}
stack_t2m = xr.open_mfdataset(os.path.join(local + "/MSWEP-MX/tas/Monthly/*.nc"), combine='by_coords', chunks ="auto").rename(dict_t2m)  
stack_t2m = stack_t2m.where((stack_t2m.lon >= -79) & (stack_t2m.lon <= -64) & (stack_t2m.lat >= -57) & (stack_t2m.lat <= -40), drop=True)
stack_t2m.to_netcdf("Temperature/Tavg_MSWX_1979_2021m.nc", encoding = encode_t2m)

## CR2MET v2.5

In [None]:
# Daily timestep
dict_pp   = {'pr':'pp'}
stack_pp  = xr.open_mfdataset(os.path.join(local + "/CR2MET/PP/*.nc"),  combine='by_coords', chunks ="auto")[["pr"]].rename(dict_pp)
stack_pp  = stack_pp.where((stack_pp.lon >= -79) & (stack_pp.lon <= -64) & (stack_pp.lat >= -57) & (stack_pp.lat <= -40), drop=True)
stack_pp  = stack_pp.astype("int16")
stack_pp.to_netcdf("Precipitation/PP_CR2MET_1960_2021d.nc", encoding = encode_pp)

stack_t2m_max = xr.open_mfdataset(os.path.join(local + "/CR2MET/T2M/*tmin_tmax*.nc"), combine='by_coords', chunks ="auto")[["tmax"]]
stack_t2m_max = stack_t2m_max.where((stack_t2m_max.lon >= -79) & (stack_t2m_max.lon <= -64) & (stack_t2m_max.lat >= -57) & (stack_t2m_max.lat <= -40), drop=True)               
stack_t2m_max.to_netcdf("Temperature/Tmax_CR2MET_1960_2021d.nc", encoding = encode_tmax)

stack_t2m_min = xr.open_mfdataset(os.path.join(local + "/CR2MET/T2M/*tmin*.nc"), combine='by_coords', chunks ="auto")[["tmin"]]
stack_t2m_min = stack_t2m_min.where((stack_t2m_min.lon >= -79) & (stack_t2m_min.lon <= -64) & (stack_t2m_min.lat >= -57) & (stack_t2m_min.lat <= -40), drop=True)                               
stack_t2m_min.to_netcdf("Temperature/Tmin_CR2MET_1960_2021d.nc", encoding = encode_tmin) 

stack_t2m      = (stack_t2m_max["tmax"] + stack_t2m_min["tmin"])/2 # The mean of the maximum and minimum
stack_t2m      = stack_t2m.to_dataset(name = "t2m")
stack_t2m.to_netcdf("Temperature/Tavg_CR2MET_1960_2021d.nc", encoding = encode_t2m)

In [None]:
# To monthly timestep 
stack_pp       = stack_pp.resample(time='MS').sum()    
stack_t2m_max  = stack_t2m_max.resample(time='MS').mean()    
stack_t2m_min  = stack_t2m_min.resample(time='MS').mean()
stack_t2m      = (stack_t2m_max["tmax"]+stack_t2m_min["tmin"])/2 # The mean of the maximum and minimum
stack_t2m      = stack_t2m.to_dataset(name = "t2m")

stack_pp.to_netcdf("Precipitation/PP_CR2MET_1960_2021m.nc", encoding = encode_pp)
stack_t2m.to_netcdf("Temperature/Tavg_CR2MET_1960_2021m.nc", encoding = encode_t2m)

## W5E5 v2.0

In [None]:
# Daily timestep
stack_pp  = xr.open_mfdataset(os.path.join(local + "/W5E5/pr_*.nc"), combine='by_coords', chunks ="auto").rename({'pr':'pp'})
stack_pp  = stack_pp.where((stack_pp.lon >= -79) & (stack_pp.lon <= -64) & (stack_pp.lat >= -57) & (stack_pp.lat <= -40), drop=True)
stack_pp  = (stack_pp * 86400).astype("int16")
stack_pp.to_netcdf("Precipitation/PP_W5E5_1979_2019d.nc", encoding = encode_pp)

stack_t2m = xr.open_mfdataset(os.path.join(local + "/W5E5/tas_*.nc"), combine='by_coords', chunks ="auto").rename({'tas':'t2m'})  
stack_t2m = stack_t2m.where((stack_t2m.lon >= -79) & (stack_t2m.lon <= -64) & (stack_t2m.lat >= -57) & (stack_t2m.lat <= -40), drop=True)
stack_t2m  = stack_t2m - 273.15
stack_t2m.to_netcdf("Temperature/Tavg_W5E5_1979_2019d.nc", encoding = encode_t2m)

stack_tmax = xr.open_mfdataset(os.path.join(local + "/W5E5/tasmax_*.nc"), combine='by_coords', chunks ="auto").rename({'tasmax':'t2m'})  
stack_tmax = stack_tmax.where((stack_tmax.lon >= -79) & (stack_tmax.lon <= -64) & (stack_tmax.lat >= -57) & (stack_tmax.lat <= -40), drop=True)
stack_tmax = stack_tmax - 273.15
stack_tmax.to_netcdf("Temperature/Tmax_W5E5_1979_2019d.nc", encoding = encode_t2m)

stack_tmin = xr.open_mfdataset(os.path.join(local + "/W5E5/tasmin_*.nc"), combine='by_coords', chunks ="auto").rename({'tasmin':'t2m'})  
stack_tmin = stack_tmin.where((stack_tmin.lon >= -79) & (stack_tmin.lon <= -64) & (stack_tmin.lat >= -57) & (stack_tmin.lat <= -40), drop=True)
stack_tmin = stack_tmin - 273.15
stack_tmin.to_netcdf("Temperature/Tmin_W5E5_1979_2019d.nc", encoding = encode_t2m)

In [None]:
# To monthly timestep 
stack_pp  = stack_pp.resample(time='MS').sum()
stack_pp.to_netcdf("Precipitation/PP_W5E5_1979_2019m.nc", encoding = encode_pp)

stack_t2m  = stack_t2m.resample(time='MS').mean()
stack_t2m.to_netcdf("Temperature/Tavg_W5E5_1979_2019m.nc", encoding = encode_t2m)

## GLEAM v3.6a

In [None]:
# Daily timestep
pet_stack = xr.open_mfdataset(os.path.join(local + "/GLEAM/Daily/Ep_*.nc"), combine='by_coords', chunks ="auto").rename({'Ep':'pet'})["pet"]
pet_stack = pet_stack.where((pet_stack.lon >= -79) & (pet_stack.lon <= -64) & (pet_stack.lat >= -57) & (pet_stack.lat <= -40), drop=True)
pet_stack = pet_stack.interpolate_na(dim="lon", method="linear", limit=1)
pet_stack = pet_stack.where(pet_stack > 0, 0)
pet_mask  = pet_stack[0] > 0
pet_stack = pet_stack.where(pet_mask)
pet_stack = pet_stack.round(2)
pet_stack.lon.attrs['long_name'] = 'longitude'
pet_stack.lat.attrs["long_name"] = "latitude"
pet_stack.to_netcdf("Evapotranspiration/PET_GLEAM36a_1980_2021d.nc", encoding = {"pet": {"zlib": True, "complevel": 9, "dtype": "float32"}})

# Monthly timestep
pet_stack = pet_stack.resample(time='MS').sum()  # to monthly timestep
pet_stack = pet_stack.where(pet_mask)
pet_stack = pet_stack.round(0)
pet_stack.to_netcdf("Evapotranspiration/PET_GLEAM36a_1980_2021m.nc", encoding = {"pet": {"zlib": True, "complevel": 1, "dtype": "int16"}})

# Annual timestep
pet_stack = pet_stack.resample(time='YS').sum().mean(dim = "time")
pet_stack = pet_stack.where(pet_mask)
pet_stack = pet_stack.round(0)
pet_stack = pet_stack.rename({'lon':'x', 'lat':'y'})
pet_stack.rio.to_raster("Evapotranspiration/PET_GLEAM36a_1980_2021.tif")

## WATER BALANCE III & IV

In [None]:
sample  = xr.open_dataset("Precipitation/PP_CR2MET_1979_2020d.nc").pp[0].rename({'lat':'y','lon':'x'})

dict_bh3  = {'dim_lon':'x', 'dim_lat':'y', 'dim_time':'time'} # monthly averaged reanalysis (ok)
bh3_stack = xr.open_dataset(os.path.join(local + "/DGA_BH/BH3/netcdf/1_Historico/regionalizacion_1979_2015.nc")).rename(dict_bh3)
bh3_stack = bh3_stack.assign_coords(time = pd.date_range(start='1979/01/01', end='2015/12/01', freq='MS'))
bh3_stack = bh3_stack.assign_coords(x  = bh3_stack.lon)
bh3_stack = bh3_stack.assign_coords(y  = bh3_stack.lat)
bh3_stack = bh3_stack.transpose("time", "y", "x")

bh3_stack = bh3_stack.where((bh3_stack.x >= -75) & (bh3_stack.x <= -71) & (bh3_stack.y >= -45.8) & (bh3_stack.y <= -40), drop=True)
bh3_stack = bh3_stack.sel(time = slice("1985-01-01", "2016-01-01"))
bh3_stack = bh3_stack[["pr", "ET", "PET"]].resample(time='1Y').sum()
bh3_stack = bh3_stack.mean("time")
bh3_stack = bh3_stack.where(bh3_stack.pr != 0)
bh3_stack["ET"] = bh3_stack.ET*30
bh3_stack["PET"] = bh3_stack.PET*30
bh3_stack = bh3_stack.interp_like(sample, method='nearest') # BH3 and BH4 are not aligned

bh4_stack  = xr.Dataset(dict(pr = xr.open_dataset(local + "/DGA_BH/BH4/Archivos_raster/BH_85-15/Forzantes/1_Historico/pr_Anual_LatLon.tif").band_data,
                             PET = xr.open_dataset(local + "/DGA_BH/BH4/Archivos_raster/BH_85-15/VIC/1_Historico/pet_Anual_LatLon.tif").band_data,
                             ET = xr.open_dataset(local + "/DGA_BH/BH4/Archivos_raster/BH_85-15/VIC/1_Historico/et_Anual_LatLon.tif").band_data))
bh4_stack  = bh4_stack.sel(band=1, drop=True).drop("spatial_ref")
bh4_stack  = bh4_stack.interp_like(sample, method='nearest')
bh4_stack = bh4_stack.combine_first(bh3_stack)
bh4_stack = bh4_stack.rio.write_crs("epsg:4326", inplace=True)

bh4_stack.pr.rio.to_raster("Precipitation/PP_WB_DGA_1985_2015.tif")
bh4_stack.PET.rio.to_raster("Evapotranspiration/PET_WB_DGA_1985_2015.tif")
bh4_stack.ET.rio.to_raster("Evapotranspiration/ET_WB_DGA_1985_2015.tif")