# HadUK-Grid pre-processing 
The HadUK-Grid data from the CEDA Data Catalogue are available as single netCDF files for each year-month-variable combination.
This script merges these files to produce a single netCDF file for each climatic variable

In [None]:
# Standard libraries
import datetime as dt
import os

# Third-party libraries
import xarray as xr

In [None]:
# Constants
DATA_DIR = os.path.join(os.getcwd(), 'data')
OUTPUT_DIR = os.path.join(DATA_DIR, 'intermediate')
HAD_DIR = os.path.join(DATA_DIR, 'external/observations/ceda')

PARAMETER_NAMES = []
for path in os.listdir(HAD_DIR):
  if os.path.isdir(os.path.join(HAD_DIR, path)):
    PARAMETER_NAMES.append(path)

In [None]:
# Load, merge, and write to disk
PARAMETER_NAMES
for param in PARAMETER_NAMES:
  ds = xr.open_mfdataset(os.path.join(HAD_DIR, param, '*.nc'), parallel=True, decode_coords="all", chunks='auto')
  ds.to_netcdf(os.path.join(OUTPUT_DIR, param + '-hadgriduk-1km.nc'))
  ds.close()

In [None]:
# Merge the temperature-related variables
tempParams = ['tas', 'tasmin', 'tasmax']
tempFiles = []
for temp in tempParams:
  tempFiles.append(os.path.join(OUTPUT_DIR, temp + '-hadgriduk-1km.nc')) 
dsTemp = xr.open_mfdataset(tempFiles, parallel=True, decode_coords="all", chunks='auto')
dsTemp.to_netcdf(os.path.join(OUTPUT_DIR, 'tas-tasmin-tasmax-hadgriduk-1km.nc'), engine='netcdf4')
dsTemp.close()