In [99]:
import xarray as xr
import datetime as dt
import rioxarray as rio
from rasterio.enums import Resampling
from pathlib import Path
import requests
import geopandas as gpd

In [100]:
data_dir = Path("./data/reference/")

#clip geometry
gdf = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres")) 
gdf = gdf[gdf['name']=='Argentina'].reset_index(drop=True)
clip_geojson = [gdf.geometry.__geo_interface__['features'][0]['geometry']]

#variables
variables = ['pre','tmp','tmn','tmx']
year_period = ['1981.1990','2001.2010','2011.2020']
endpoint = "https://crudata.uea.ac.uk/cru/data/hrg/cru_ts_4.07/cruts.2304141047.v4.07/{variable}/cru_ts4.07.{year}.{variable}.dat.nc.gz"


  gdf = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))


In [101]:
#download files        
with requests.Session() as s:
    for variable in variables:
        for year in year_period:
            gzip_url = endpoint.format(variable=variable, year=year)
            fname = data_dir/ gzip_url.split("/")[-1]

            if not fname.exists():
                r = s.get(gzip_url,stream=True)
                with open(fname, 'wb') as f:
                    for chunk in r.raw.stream(1024, decode_content=False):
                        if chunk:
                            f.write(chunk)

In [103]:
#process data
for var in variables:
       timeseries_dataset = data_dir.glob(f"*.{var}.dat.nc.gz")
       fname = f"CRU_TS_4_07_Avg_Monthly_{var.upper()}_1981_2020.tif"

       img = (xr.open_mfdataset(timeseries_dataset,cache=False)[var]
              .rename({'lon':'x','lat':'y'})
              .rio.write_crs(4326)
              .rio.clip(clip_geojson, from_disk=True, all_touched=True)
              .groupby('time.month').mean()
              .rio.reproject(dst_crs=f"epsg:3857", resolution=4000, resampling=Resampling.bilinear)
              )

       img.rio.to_raster(fname,driver="COG",windowed=True)
       print(f"Saved as {fname}")



Saved as CRU_TS_4_07_Avg_Monthly_PRE_1981_2020.tif


: 