In [1]:
# Standard libraries
import os

# Third-party libraries
import pandas as pd
import xarray as xr

# Local libraries
from utilities import netcdf


In [2]:
# Paths
DATA_DIR = os.path.join(os.getcwd(), "data/intermediate/hadukgrid")
OUT_DIR = os.path.join(os.getcwd(), "data/processed/variable")


In [3]:
NC_FILES = netcdf.list_files(DATA_DIR, path=True)


In [4]:
names = {
    "groundfrost": "Frost",
    "sun": "Sun",
    "tas": "Temperature",
    "snowLying": "Snow",
    "tasmax": "Maximum temperature",
    "hurs": "Humidity",
    "rainfall": "Rain",
    "sfcWind": "Wind",
    "tasmin": "Minimum temperature",
}

codes = {
    "groundfrost": "frost",
    "sun": "sun",
    "tas": "tas",
    "snowLying": "snow",
    "tasmax": "tasmax",
    "hurs": "hurs",
    "rainfall": "rain",
    "sfcWind": "wind",
    "tasmin": "tasmin",
}


def get_attributes(data_array, to_drop=None):
    """
    Returns a dict describing the attributes of a DataArray
    Optionally, filters the attributes with to_drop
    """
    assert isinstance(to_drop, list) or to_drop is None
    assert isinstance(data_array, xr.DataArray)
    attrs_dict = {}
    for key, value in data_array.attrs.items():
        if key not in to_drop:
            attrs_dict[key] = value
    attrs_dict["id"] = data_array.name
    attrs_dict["name"] = names[data_array.name]
    return attrs_dict


In [5]:
rows_list = []
for path in NC_FILES:
    with xr.open_dataarray(path, decode_coords="all", chunks="auto") as da:
        attrs_dict = get_attributes(
            da, to_drop=["units", "cell_methods", "level", "standard_name"]
        )
        rows_list.append(attrs_dict)
df = pd.DataFrame(rows_list)
df


Unnamed: 0,long_name,description,label_units,plot_label,id,name
0,Number of days with ground frost (minimum gras...,Days with ground frost,days,Ground frost (days),groundfrost,Frost
1,Sunshine hours,Sunshine hours,h,Sunshine hours (h),sun,Sun
2,Mean air temperature,Mean air temperature,C,Mean air temperature at 1.5m (C),tas,Temperature
3,Number of days with snow lying at 0900,Snow lying,days,Snow lying (days),snowLying,Snow
4,Maximum air temperature,Maximum air temperature,C,Maximum air temperature at 1.5m (C),tasmax,Maximum temperature
5,Relative humidity,Relative humidity,%,Relative humidity at 1.5m (%),hurs,Humidity
6,Total precipitation amount,Total rainfall,mm,Total rainfall (mm),rainfall,Rain
7,Wind speed at 10m,Wind speed,m s-1,Wind speed at 10m (m s-1),sfcWind,Wind
8,Minimum air temperature,Minimum air temperature,C,Minimum air temperature at 1.5m (C),tasmin,Minimum temperature


In [6]:
df = (
    df.rename(
        columns={
            "description": "long_name",
            "long_name": "description",
            "label_units": "units",
            "plot_label": "label",
        }
    )
    .sort_values("name")
    .reset_index(drop=True)
)

df


Unnamed: 0,description,long_name,units,label,id,name
0,Number of days with ground frost (minimum gras...,Days with ground frost,days,Ground frost (days),groundfrost,Frost
1,Relative humidity,Relative humidity,%,Relative humidity at 1.5m (%),hurs,Humidity
2,Maximum air temperature,Maximum air temperature,C,Maximum air temperature at 1.5m (C),tasmax,Maximum temperature
3,Minimum air temperature,Minimum air temperature,C,Minimum air temperature at 1.5m (C),tasmin,Minimum temperature
4,Total precipitation amount,Total rainfall,mm,Total rainfall (mm),rainfall,Rain
5,Number of days with snow lying at 0900,Snow lying,days,Snow lying (days),snowLying,Snow
6,Sunshine hours,Sunshine hours,h,Sunshine hours (h),sun,Sun
7,Mean air temperature,Mean air temperature,C,Mean air temperature at 1.5m (C),tas,Temperature
8,Wind speed at 10m,Wind speed,m s-1,Wind speed at 10m (m s-1),sfcWind,Wind


In [7]:
df.to_csv(os.path.join(OUT_DIR, "variable.csv"), index=False)
