In [15]:
# Standard libraries
import os

# Third-party libraries
import pandas as pd
import xarray as xr

# Local libraries
from get_nc_files import get_nc_files


In [16]:
# Paths
DATA_DIR = os.path.join(os.getcwd(), "data/intermediate/hadukgrid")
OUT_DIR = os.path.join(os.getcwd(), "data/processed")


In [17]:
NC_FILES = get_nc_files(DATA_DIR)

In [18]:
rows_list = []
for index, path in enumerate(NC_FILES):
    attr_dict = {}
    da = xr.open_dataarray(path, decode_coords="all", chunks="auto")
    attr_dict.update(da.attrs)
    attr_dict["name"] = da.name
    rows_list.append(attr_dict)
    da.close()
df = pd.DataFrame(rows_list)
df

Unnamed: 0,long_name,units,description,label_units,plot_label,cell_methods,name,standard_name,level
0,Number of days with ground frost (minimum gras...,1.0,Days with ground frost,days,Ground frost (days),time: minimum within days time: sum over days,groundfrost,,
1,Sunshine hours,hour,Sunshine hours,h,Sunshine hours (h),time: sum,sun,duration_of_sunshine,
2,Mean air temperature,degC,Mean air temperature,C,Mean air temperature at 1.5m (C),time: mid_range within days time: mean over days,tas,air_temperature,1.5m
3,Number of days with snow lying at 0900,1.0,Snow lying,days,Snow lying (days),time: sum (interval: 1 day),snowLying,surface_snow_binary_mask,
4,Maximum air temperature,degC,Maximum air temperature,C,Maximum air temperature at 1.5m (C),time: maximum within days time: mean over days,tasmax,air_temperature,1.5m
5,Relative humidity,0.01,Relative humidity,%,Relative humidity at 1.5m (%),time: mean,hurs,relative_humidity,1.5m
6,Total precipitation amount,mm,Total rainfall,mm,Total rainfall (mm),time: sum,rainfall,lwe_thickness_of_precipitation_amount,
7,Wind speed at 10m,m s-1,Wind speed,m s-1,Wind speed at 10m (m s-1),time: mean,sfcWind,wind_speed,10m
8,Minimum air temperature,degC,Minimum air temperature,C,Minimum air temperature at 1.5m (C),time: minimum within days time: mean over days,tasmin,air_temperature,1.5m


In [19]:
df = (
    df
    .drop(columns=["units", "cell_methods", "level", "standard_name"])
    .rename(
        columns={"description": "longName", "long_name": "description", "label_units": "units", "plot_label": "label"}
    )
    .sort_values("name")
    .reset_index(drop=True)
)
df.insert(0, 'id', range(1, 1 + len(df)))

df

Unnamed: 0,id,description,longName,units,label,name
0,1,Number of days with ground frost (minimum gras...,Days with ground frost,days,Ground frost (days),groundfrost
1,2,Relative humidity,Relative humidity,%,Relative humidity at 1.5m (%),hurs
2,3,Total precipitation amount,Total rainfall,mm,Total rainfall (mm),rainfall
3,4,Wind speed at 10m,Wind speed,m s-1,Wind speed at 10m (m s-1),sfcWind
4,5,Number of days with snow lying at 0900,Snow lying,days,Snow lying (days),snowLying
5,6,Sunshine hours,Sunshine hours,h,Sunshine hours (h),sun
6,7,Mean air temperature,Mean air temperature,C,Mean air temperature at 1.5m (C),tas
7,8,Maximum air temperature,Maximum air temperature,C,Maximum air temperature at 1.5m (C),tasmax
8,9,Minimum air temperature,Minimum air temperature,C,Minimum air temperature at 1.5m (C),tasmin


In [20]:
df.to_csv(os.path.join(OUT_DIR, "variable.csv"), index=False)
