In [48]:
import glob
import geopandas as gp
import numpy as np
import pandas as pd
import xarray as xr

# Argentinian provinces that make up 90% of its wheat production
relevant_areas = ['Buenos Aires', 'Córdoba', 'Entre Ríos', 'La Pampa', 'Santa Fe', 'Santiago del Estero']

In [49]:
# Read shapefile of the relevant Argentinian provinces
adm01_shape = gp.read_file("data/SHAPE/arg_admbnda_adm1_unhcr2017.shp")
adm01_shape = adm01_shape.loc[adm01_shape["ADM1_ES"].isin(relevant_areas), ["ADM1_ES", "geometry"]].reset_index(drop=True)
adm01_shape = adm01_shape.to_crs("EPSG:4326")

In [50]:
df = xr.open_dataset("data/ERA5/era5_tmean_argentina.grib").to_dataframe().reset_index()

In [51]:
# store all coordinate pairs from ERA in geopandas dataframe
hindcasts_coordinates = df.groupby(["latitude", "longitude"]).size().reset_index().drop(columns=[0])
hindcasts_coordinates = gp.GeoDataFrame(hindcasts_coordinates, geometry=gp.points_from_xy(hindcasts_coordinates["longitude"], hindcasts_coordinates["latitude"], crs=4326))

# Spatial join to filter grid cell locations that are within provinces
hindcasts_coordinates = (gp
                        .sjoin(hindcasts_coordinates, adm01_shape, how="left", predicate="within")
                        .dropna(subset=["ADM1_ES"])[["latitude", "longitude", "ADM1_ES"]]
                        .set_index(["latitude", "longitude"]))

# Filter hindcasts by location
df = df.set_index(["latitude", "longitude"]).merge(hindcasts_coordinates, left_index=True, right_index=True, how="inner").reset_index()

# Filter NaNs
df = df.dropna(subset=["t2m"]).reset_index(drop=True)
# Kelvin to Celsius
df["t2m"] = df["t2m"].apply(lambda x: x - 273.15)
# time to datetime
df["time"] = pd.to_datetime(df["time"])
df["valid_time"] = pd.to_datetime(df["valid_time"])

# Sort values
df = np.round(df.sort_values(by=["time", "latitude", "longitude"]).loc[:, ["time", "latitude", "longitude", "t2m"]].reset_index(drop=True), 2)

In [52]:
df

Unnamed: 0,time,latitude,longitude,t2m
0,1993-01-01,-40.9,-62.8,20.79
1,1993-01-01,-40.9,-62.7,20.59
2,1993-01-01,-40.9,-62.6,20.45
3,1993-01-01,-40.9,-62.5,20.29
4,1993-01-01,-40.9,-62.4,20.11
...,...,...,...,...
2697979,2016-12-01,-25.7,-62.2,27.10
2697980,2016-12-01,-25.7,-62.1,26.98
2697981,2016-12-01,-25.7,-62.0,26.84
2697982,2016-12-01,-25.7,-61.9,26.83
