### Importing packages and data_utils.py

In [1]:
from data_utils import *

2023-06-14 03:20:54.457087: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
!pwd

/ocean/projects/atm200007p/jlin96/neurips_proj/ClimSim_release


### Initialize folder paths

In [3]:
data_path = '/ocean/projects/atm200007p/walrus/for_jerry/train/'
norm_path = '/ocean/projects/atm200007p/jlin96/neurips_proj/mooers_metrics/norm_factors/'
grid_path = '/ocean/projects/atm200007p/jlin96/neurips_proj/mooers_metrics/test_data/E3SM-MMF_ne4_grid-info.orig.nc'

### Create variables necessary for initialization

In [4]:
data_path = data_path
input_vars = ['state_t', # air temperature, vertically resolved, units: K
              'state_q0001', # specific humidity, vertically resolved, units: kg/kg
              'state_ps', # surface pressure, scalar, units: Pa
              'pbuf_SOLIN', # solar insolation, scalar, units: W/m^2
              'pbuf_LHFLX', # surface latent heat flux, scalar, units: W/m^2
              'pbuf_SHFLX'] # surface sensible heat flux, scalar, units: W/m^2
target_vars = ['ptend_t', # air temperature tendency, vertically resolved, units: K/s
               'ptend_q0001', # specific humidity tendency, vertically resolved, units: kg/kg/s
               'cam_out_NETSW', # net shortwave flux at surface, scalar, units: W/m^2
               'cam_out_FLWDS', # downward longwave flux at surface, scalar, units: W/m^2
               'cam_out_PRECSC', # snow rate (liquid water equivalent), scalar, units: m/s
               'cam_out_PRECC', # rain rate, scalar, units: m/s
               'cam_out_SOLS', # downward visible direct solar flux to surface, scalar, units: W/m^2
               'cam_out_SOLL', # downward near-infrared direct solar flux to surface, scalar, units: W/m^2
               'cam_out_SOLSD', # downward visible diffuse solar flux to surface, scalar, units: W/m^2
               'cam_out_SOLLD']# downward near-infrared diffuse solar flux to surface, sclar, units: W/m^2
grid_info = xr.open_dataset(grid_path)
inp_mean = xr.open_dataset(norm_path + 'mli_mean.nc')
inp_max = xr.open_dataset(norm_path + 'mli_max.nc')
inp_min = xr.open_dataset(norm_path + 'mli_min.nc')
out_scale = xr.open_dataset(norm_path + 'mlo_scale.nc')

### Create data_utils object

In [5]:
data = data_utils(data_path = data_path, 
                  input_vars = input_vars, 
                  target_vars = target_vars, 
                  grid_info = grid_info, 
                  inp_mean = inp_mean, 
                  inp_max = inp_max, 
                  inp_min = inp_min, 
                  out_scale = out_scale)

### Create training data

In [6]:
# set regular expressions for selecting training data
data.set_regexps(data_split = 'train', 
                 regexps = ['E3SM-MMF.mli.000[1234567]-*-*-*.nc', # years 1 through 7
                            'E3SM-MMF.mli.0008-01-*-*.nc']) # first month of year 8
# set temporal subsampling
data.set_stride_sample(data_split = 'train', stride_sample = 7)
# create list of files to extract data from
data.set_filelist(data_split = 'train')
# save numpy files of training data
data.save_as_npy(data_split = 'train', save_path = '')

2023-06-14 03:20:58.359697: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


### Create validation data

In [7]:
# set regular expressions for selecting validation data
data.set_regexps(data_split = 'val',
                 regexps = ['E3SM-MMF.mli.0008-0[23456789]-*-*.nc', # months 2 through 9 of year 8
                            'E3SM-MMF.mli.0008-1[012]-*-*.nc', # months 10 through 12 of year 8
                            'E3SM-MMF.mli.0009-01-*-*.nc']) # first month of year 9
# set temporal subsampling
data.set_stride_sample(data_split = 'val', stride_sample = 7)
# create list of files to extract data from
data.set_filelist(data_split = 'val')
# save numpy files of training data
data.save_as_npy(data_split = 'val', save_path = '')

2023-06-14 05:19:01.725068: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


### Create scoring data

In [8]:
# set regular expressions for selecting scoring data (stride of 6 is needed for daily averaging)
data.set_regexps(data_split = 'scoring',
                 regexps = ['E3SM-MMF.mli.0008-0[23456789]-*-*.nc', # months 2 through 9 of year 8
                            'E3SM-MMF.mli.0008-1[012]-*-*.nc', # months 10 through 12 of year 8
                            'E3SM-MMF.mli.0009-01-*-*.nc']) # first month of year 9
# set temporal subsampling
data.set_stride_sample(data_split = 'scoring', stride_sample = 6)
# create list of files to extract data from
data.set_filelist(data_split = 'scoring')
# save numpy files of training data
data.save_as_npy(data_split = 'scoring', save_path = '')

2023-06-14 05:35:01.140716: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


In [9]:
print('finished')

finished
