# Prep data for training
1. Load & append years (ERA/GPCP)
2. Regrid: 
    a. z500 --> 2.5x2.5 (coarse grid - high spatial correlation)
    b. precip --> 1.25x1 (highest resolution)

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import datetime as dt

## LOAD TRAINING DATA

### Reanalysis

In [2]:
z500_era_path    = '/glade/derecho/scratch/kjmayer/DATA/ERA5/z500/daily/'
z500_era_finame   = 'e5.oper.an.pl.128_129_z.ll025sc.*.nc'

z500_era = xr.open_mfdataset(z500_era_path+z500_era_finame,
                              concat_dim = 'time',
                              combine="nested",
                             )['Z']
z500_era = z500_era/10. #.where(z500_era.time.dt.year >= 1996, drop = True)
z500_era["time"] = z500_era["time"].astype("datetime64[ns]")

In [5]:
precip_gpcp_path = '/glade/derecho/scratch/kjmayer/DATA/GPCP/PRECT/daily/'
precip_gpcp_finame = 'precip_30S-30N_*.nc'
precip_gpcp = xr.open_mfdataset(precip_gpcp_path+precip_gpcp_finame,
                                concat_dim = 'time',
                                combine="nested",
                                )['precip']

## Regrid Data

In [3]:
# ----- regrid z500 to 2.5x2.5 (bilinear interpolation) -----
# 0.25 x 0.25
z500_era_regrid = z500_era.interp(latitude = np.arange(20,92.5,2.5),
                                  longitude = np.arange(0,360.,2.5),
                                  method = 'linear') 
z500_era_regrid = z500_era_regrid.astype(np.float32) # for some reason the lat and lons were object not float32

In [8]:
# ----- regrid precip to 2.5x2.5 (bilinear interpolation) & 20S-20N = lower res to use less memory -----
# 1.0 x 1.0
precip_gpcp = precip_gpcp.where((precip_gpcp['latitude'] >= -20) & (precip_gpcp['latitude'] <= 20), drop=True)

precip_gpcp_regrid = precip_gpcp.interp(latitude = np.arange(-20,20.5,2.5),
                                        longitude = np.arange(0,360.0,2.5),
                                        method = 'linear') 

## Save Combined & Regridded Data

In [13]:
precip_gpcp_regrid.to_netcdf(precip_gpcp_path+'precip_daily_1996-2023_20S-20N_regrid2.5x2.5.nc')
z500_era_regrid.to_netcdf(z500_era_path+'z500_daily_1996-2023_20-90N_regrid2.5x2.5.nc')

In [45]:
# Compare xarray interp and xesmf regridder (theyre the same)

# import xesmf as xe
# z500_erads = xr.open_mfdataset(z500_era_path+z500_era_finame,
#                               concat_dim = 'time',
#                               combine="nested",
#                              )
# ds_out = xr.Dataset({"latitude": (["latitude"], np.arange(20,92.5,2.5), {"units": "degrees_north"}),
#                      "longitude": (["longitude"], np.arange(0,360.,2.5), {"units": "degrees_east"}),})
# regridder = xe.Regridder(z500_erads, ds_out, "bilinear")

# regrid_check = regridder(z500_erads['Z'],keep_attrs=True)

# check = z500_erads['Z'].where(z500_erads['Z'].time.dt.year >= 1997, drop = True)/10.
# regrid_check = regrid_check.where(regrid_check.time.dt.year >= 1997, drop = True)/10.

# # plot and take difference between methods --> xarray interp and xesmf regridder are the same!
# check[0].plot(levels=np.arange(3500,5600,100))
# plt.show()
# regrid_check[0].plot(levels=np.arange(3500,5600,100))
# plt.show()
# z500_era_regrid[0].plot(levels=np.arange(3500,5600,100))
# plt.show()

# diff_regridtest = regrid_check[0] - z500_era_regrid[0]
# diff_regridtest.plot(levels=np.arange(-1,1.1,.1))
# plt.show()