### data preprocessing (ERA5 MSL)

Notebook by Maria J. Molina (NCAR) and Taylor Dolan (UND/ASP)

Notebook takes mean sea level pressure data from ERA5 and regrids it onto a CESM2 nominal 1-degree grid.

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import xesmf as xe

In [2]:
full_dates = pd.date_range(start='1979-01-01', end='2022-02-01', freq='MS')

In [3]:
ds_cesm = xr.open_dataset(
    '/glade/scratch/molina/b.e21.Z500.1031.002.nc').isel(time=0)

ds_lats = ds_cesm.lat
ds_lons = ds_cesm.lon

In [None]:
for indx, i in enumerate(full_dates):

    # open the ERA5 dataset
    ds_era5 = xr.open_mfdataset(
        f"/gpfs/fs1/collections/rda/data/ds633.0/e5.oper.an.sfc/{i.strftime('%Y%m')}/e5.oper.an.sfc.128_151_msl.*.nc")

    if indx == 0:

        # Identify min max range (already did above as well)
        lat0_bnd = int(np.around(ds_lats.min(skipna=True).values))
        lat1_bnd = int(np.around(ds_lats.max(skipna=True).values))
        lon0_bnd = int(np.around(ds_lons.min(skipna=True).values))
        lon1_bnd = int(np.around(ds_lons.max(skipna=True).values)) - 1

        ds_out = xe.util.grid_2d(lon0_b=lon0_bnd - 0.625,
                                 lon1_b=lon1_bnd + 0.625,
                                 d_lon=(ds_lons[5] - ds_lons[4]).values,
                                 lat0_b=lat0_bnd - 0.47120419,
                                 lat1_b=lat1_bnd,
                                 d_lat=(ds_lats[5] - ds_lats[4]).values)

        regridder = xe.Regridder(ds_era5, ds_out, 'bilinear', False)

        regridder.to_netcdf()

        dr_out = regridder(ds_era5['MSL'], keep_attrs=True)
        dr_out = dr_out.assign_coords(
            lon=('x', dr_out.coords['lon'][0, :].values),
            lat=('y', dr_out.coords['lat'][:, 0].values))
        x = dr_out.rename(y='lat', x='lon')

    if indx > 0:

        regridder2 = xe.Regridder(
            ds_era5, ds_out, 'bilinear', reuse_weights=True,
            weights=regridder.weights)

        dr_out = regridder2(ds_era5['MSL'], keep_attrs=True)
        dr_out = dr_out.assign_coords(
            lon=('x', dr_out.coords['lon'][0, :].values),
            lat=('y', dr_out.coords['lat'][:, 0].values))
        x = dr_out.rename(y='lat', x='lon')

    x.to_netcdf(
        f"/glade/scratch/molina/data_for_taylor/e5.MSL_{i.strftime('%Y%m')}_regridded.nc")

