### Importing packages and data_utils.py

In [None]:
from climsim_utils.data_utils import *

### Instantiating class

In [None]:
grid_path = './grid_info/ClimSim_low-res_grid-info.nc'
norm_path = './preprocessing/normalizations/'

grid_info = xr.open_dataset(grid_path)
input_mean = xr.open_dataset(norm_path + 'inputs/input_mean.nc')
input_max = xr.open_dataset(norm_path + 'inputs/input_max.nc')
input_min = xr.open_dataset(norm_path + 'inputs/input_min.nc')
output_scale = xr.open_dataset(norm_path + 'outputs/output_scale.nc')

data = data_utils(grid_info = grid_info, 
                  input_mean = input_mean, 
                  input_max = input_max, 
                  input_min = input_min, 
                  output_scale = output_scale)

### Set variable list

In [None]:
# set inputs and outputs to V1 subset
data.set_to_v2_vars()

# v2 inputs (name :: description :: dimension :: units): 

# 'state_t' :: air temperature :: 60 :: K 
# 'state_q0001' :: specific humidity :: 60 :: kg/kg
# 'state_q0002' :: cloud liquid mixing ratio :: 60 :: kg/kg
# 'state_q0003' :: cloud ice mixing ratio :: 60 :: kg/kg
# 'state_u' :: zonal wind speed :: 60 :: m/s
# 'state_v' :: meridional wind speed :: 60 :: m/s
# 'state_ps' :: surface pressure :: 1 :: Pa
# 'pbuf_SOLIN' :: solar insolation :: 1 :: W/m^2
# 'pbuf_LHFLX' :: surface latent heat flux :: 1 :: W/m^2
# 'pbuf_SHFLX' :: surface sensible heat flux :: 1 :: W/m^2
# 'pbuf_TAUX' :: zonal surface stress :: 1 :: N/m^2
# 'pbuf_TAUY' :: meridional surface stress :: 1 :: N/m^2
# 'pbuf_COSZRS' :: cosine of solar zenith angle :: 1
# 'cam_in_ALDIF' :: albedo for diffuse longwave radiation :: 1
# 'cam_in_ALDIR' :: albedo for direct longwave radiation :: 1
# 'cam_in_ASDIF' :: albedo for diffuse shortwave radiation :: 1
# 'cam_in_ASDIR' :: albedo for direct shortwave radiation :: 1
# 'cam_in_LWUP' :: upward longwave flux :: 1 :: W/m^2
# 'cam_in_ICEFRAC' :: sea-ice areal fraction :: 1
# 'cam_in_LANDFRAC' :: land areal fraction :: 1
# 'cam_in_OCNFRAC' :: ocean areal fraction :: 1
# 'cam_in_SNOWHICE' :: snow depth over ice :: 1 :: m
# 'cam_in_SNOWHLAND' :: snow depth over land :: 1 :: m
# 'pbuf_ozone' :: ozone volume mixing ratio :: 60 :: mol/mol
# 'pbuf_CH4' :: methane volume mixing ratio :: 60 :: mol/mol
# 'pbuf_N2O' :: nitrous oxide volume mixing ratio :: 60 :: mol/mol

# v2 outputs (name :: description :: dimension :: units): 

# 'ptend_t' :: heating tendency :: 60 :: K/s 
# 'ptend_q0001' :: moistening tendency :: 60 :: kg/kg/s
# 'ptend_q0002' :: cloud liquid mixing ratio change over time :: 60 :: kg/kg/s
# 'ptend_q0003' :: cloud ice mixing ratio change over time :: 60 :: kg/kg/s
# 'ptend_u' :: zonal wind acceleration :: 60 :: m/s^2
# 'ptend_v' :: meridional wind acceleration :: 60 :: m/s^2
# 'cam_out_NETSW' :: net shortwave flux at surface :: 1 :: W/m^2
# 'cam_out_FLWDS' :: downward longwave flux at surface :: 1 :: W/m^2 
# 'cam_out_PRECSC' :: snow rate (liquid water equivalent) :: 1 :: m/s 
# 'cam_out_PRECC' :: rain rate :: 1 :: m/s
# 'cam_out_SOLS' :: downward visible direct solar flux to surface :: 1 :: W/m^2
# 'cam_out_SOLL' :: downward near-infrared direct solar flux to surface :: 1 :: W/m^2
# 'cam_out_SOLSD' :: downward diffuse solar flux to surface :: 1 :: W/m^2
# 'cam_out_SOLLD' :: downward diffuse near-infrared solar flux to surface :: 1 :: W/m^2

### Create training data

In [None]:
# set data path
data.data_path = '/ocean/projects/atm200007p/jlin96/neurips_proj/e3sm_train/'

# set regular expressions for selecting training data
data.set_regexps(data_split = 'train', 
                 regexps = ['E3SM-MMF.mli.000[1234567]-*-*-*.nc', # years 1 through 7
                            'E3SM-MMF.mli.0008-01-*-*.nc']) # first month of year 8

# set temporal subsampling
data.set_stride_sample(data_split = 'train', stride_sample = 7)

# create list of files to extract data from
data.set_filelist(data_split = 'train')

# do not normalize
data.normalize = False

# save numpy files of training data
data.save_as_npy(data_split = 'train', save_path = '')

### Create validation data

In [None]:
# set data path
data.data_path = '/ocean/projects/atm200007p/jlin96/neurips_proj/e3sm_train/'

# set regular expressions for selecting validation data
data.set_regexps(data_split = 'val',
                 regexps = ['E3SM-MMF.mli.0008-0[23456789]-*-*.nc', # months 2 through 9 of year 8
                            'E3SM-MMF.mli.0008-1[012]-*-*.nc', # months 10 through 12 of year 8
                            'E3SM-MMF.mli.0009-01-*-*.nc']) # first month of year 9

# set temporal subsampling
data.set_stride_sample(data_split = 'val', stride_sample = 7)

# create list of files to extract data from
data.set_filelist(data_split = 'val')

# do not normalize
data.normalize = False

# save numpy files of validation data
data.save_as_npy(data_split = 'val', save_path = '')

### Create test data

In [None]:
# set data path for test data
data.data_path = '/ocean/projects/atm200007p/jlin96/neurips_proj/e3sm_test/'

# set regular expressions for selecting scoring data (stride of 6 is needed for daily averaging)
data.set_regexps(data_split = 'test',
                 regexps = ['E3SM-MMF.mli.0009-0[3456789]-*-*.nc', # months 3 through 9 of year 9
                            'E3SM-MMF.mli.0009-1[012]-*-*.nc', # months 10 through 12 of year 9
                            'E3SM-MMF.mli.0010-*-*.nc', # all months of year 10
                            'E3SM-MMF.mli.0011-0[12]-*-*.nc']) # months 1 and 2 of year 11

# set temporal subsampling
data.set_stride_sample(data_split = 'test', stride_sample = 6)

# create list of files to extract data from
data.set_filelist(data_split = 'test')

# do not normalize
data.normalize = False

# save numpy files of scoring data
data.save_as_npy(data_split = 'test', save_path = '')

In [None]:
print('finished')