### Import data_utils

In [1]:
from climsim_utils.data_utils import *

2023-08-20 18:44:56.401614: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Instantiate class

In [2]:
grid_path = '/ocean/projects/atm200007p/jlin96/neurips_proj/ClimSim_release/grid_info/ClimSim_low-res_grid-info.nc'
norm_path = '/ocean/projects/atm200007p/jlin96/neurips_proj/ClimSim_release/normalizations/'

grid_info = xr.open_dataset(grid_path)
input_mean = xr.open_dataset(norm_path + 'inputs/input_mean.nc')
input_max = xr.open_dataset(norm_path + 'inputs/input_max.nc')
input_min = xr.open_dataset(norm_path + 'inputs/input_min.nc')
output_scale = xr.open_dataset(norm_path + 'outputs/output_scale.nc')

data = data_utils(grid_info = grid_info, 
                  input_mean = input_mean, 
                  input_max = input_max, 
                  input_min = input_min, 
                  output_scale = output_scale)

### Load data and set pressure grid

In [3]:
# paths to scoring data
input_path = '/ocean/projects/atm200007p/jlin96/neurips_proj/e3sm_train_npy/scoring_input.npy'
target_path = '/ocean/projects/atm200007p/jlin96/neurips_proj/e3sm_train_npy/scoring_target.npy'

# paths to model predictions
cvae_pred_path = '/ocean/projects/atm200007p/shared/neurips_proj/final_metrics/predictions/cVAE/cvae_preds_manual.h5'
hsr_pred_path = '/ocean/projects/atm200007p/shared/neurips_proj/final_metrics/predictions/HSR/hsr_preds_bestcrps.h5'
rpn_pred_path = '/ocean/projects/atm200007p/jlin96/neurips_proj/figure_ingredients/rpn_pred_v1_stride6.npy'
cnn_pred_path = '/ocean/projects/atm200007p/jlin96/neurips_proj/figure_ingredients/val_predict_cnn_reshaped_stride6_FINAL.npy'
mlp_pred_path = '/ocean/projects/atm200007p/jlin96/neurips_proj/figure_ingredients/001_backup_phase-7_retrained_models_step2_lot-147_trial_0027.best.h5.npy'

# set variables to V1 subset
data.set_to_v1_vars()

# path to target input
data.input_scoring = np.load(input_path)

# path to target output
data.target_scoring = np.load(target_path)

# set pressure weights
data.set_pressure_grid(data.input_scoring)

# load model predictions
data.model_names = ['CNN','cVAE','HSR','MLP', 'RPN']
preds = [data.load_npy_file(load_path = cnn_pred_path), 
         data.load_h5_file(load_path = cvae_pred_path), 
         data.load_h5_file(load_path = hsr_pred_path), 
         data.load_npy_file(load_path = mlp_pred_path), 
         data.load_npy_file(load_path = rpn_pred_path)]
data.preds_scoring = dict(zip(data.model_names, preds))


### Weight outputs

1. Undo output scaling

2.  Weight vertical levels by dp/g

3. Weight horizontal area of each grid cell by a[x]/mean(a[x])

4. Convert units to a common energy unit

In [4]:
data.reweight_target(data_split = 'scoring')
data.reweight_preds(data_split = 'scoring')

### Calculate metrics

In [5]:
data.metrics_names = ['MAE', 'RMSE', 'R2']
data.create_metrics_df(data_split = 'scoring')

AssertionError: 

In [None]:
hmm = data.preds_scoring['MLP']

In [None]:
wow = data.preds_scoring["MLP"]['heating']
wow2 = data.preds_scoring["MLP"]["netsw"]

In [None]:
np.atleast_1d(wow.mean(axis = (0,1))).shape

In [None]:
np.atleast_1d(wow2.mean(axis = (0,1))).shape[0]

In [None]:
for name in data.preds_scoring:
    print(name)

In [None]:
hmm['heating'].shape

In [None]:
hmm['netsw'].shape

In [None]:
columns = ['A', 'B', 'C']
index = ['X', 'Y', 'Z']
df = pd.DataFrame(columns=columns, index=index)
df.index.name = 'Variable'
df

In [None]:
columns = ['A', 'B', 'C']
index = range(12)
df = pd.DataFrame(columns=columns, index=index)
df.index.name = 'output_idx'
df

In [None]:
df.loc[4:7,'A'] = np.arange(4,8)

In [None]:
df

In [None]:
data.calc_MAE(hmm['heating'], data.target_scoring['heating'])

In [None]:
data.target_scoring

In [None]:
for var in hmm:
    print(var)

In [None]:
hmm['heating'].mean(axis = (0,1)).shape

In [None]:
tuple(range(hmm['heating'].ndim-1))

In [None]:
scoring_data.preds_scoring['CNN'].shape

In [None]:

cnn_ex = scoring_data.calc_MAE(scoring_data.preds_scoring['CNN'], scoring_data.target_scoring)

In [None]:
scoring_data.preds_scoring['CNN'].shape

## Time to compare

### Weighted output values

In [None]:
assert(np.sum(DS_ENERGY['true']['ptend_t'].values - heating) == 0)
assert(DS_ENERGY['true']['ptend_t'].values.shape == heating.shape)

assert(np.sum(DS_ENERGY['true']['ptend_q0001'].values - moistening) == 0)
assert(DS_ENERGY['true']['ptend_q0001'].values.shape == moistening.shape)

assert(np.sum(DS_ENERGY['true']['cam_out_NETSW'].values - netsw) == 0)
assert(DS_ENERGY['true']['cam_out_NETSW'].values.shape == netsw.shape)

assert(np.sum(DS_ENERGY['true']['cam_out_FLWDS'].values - flwds) == 0)
assert(DS_ENERGY['true']['cam_out_FLWDS'].values.shape == flwds.shape)

assert(np.sum(DS_ENERGY['true']['cam_out_PRECSC'].values - precsc) == 0)
assert(DS_ENERGY['true']['cam_out_PRECSC'].values.shape == precsc.shape)

assert(np.sum(DS_ENERGY['true']['cam_out_PRECC'].values - precc) == 0)
assert(DS_ENERGY['true']['cam_out_PRECC'].values.shape == precc.shape)

assert(np.sum(DS_ENERGY['true']['cam_out_SOLS'].values - sols) == 0)
assert(DS_ENERGY['true']['cam_out_SOLS'].values.shape == sols.shape)

assert(np.sum(DS_ENERGY['true']['cam_out_SOLL'].values - soll) == 0)
assert(DS_ENERGY['true']['cam_out_SOLL'].values.shape == soll.shape)

assert(np.sum(DS_ENERGY['true']['cam_out_SOLSD'].values - solsd) == 0)
assert(DS_ENERGY['true']['cam_out_SOLSD'].values.shape == solsd.shape)

assert(np.sum(DS_ENERGY['true']['cam_out_SOLLD'].values - solld) == 0)
assert(DS_ENERGY['true']['cam_out_SOLLD'].values.shape == solld.shape)

### Metrics

In [None]:
heating_mlp, moistening_mlp, netsw_mlp, flwds_mlp, precsc_mlp, precc_mlp, sols_mlp, soll_mlp, solsd_mlp, solld_mlp = \
    scoring_data.output_weighting(scoring_data.preds_scoring['MLP'])

In [None]:
pred = heating_mlp
actual = heating

In [None]:
sq_diff = (pred - actual)**2
var_time = (actual - actual.mean(axis = 0)[np.newaxis, :, :])**2
(1 - sq_diff.mean(axis = 0)/var_time.mean(axis = 0)).mean(axis = 0).shape

In [None]:
sq_diff.shape

In [None]:
var_time.shape

In [None]:
sq_diff[..., None].shape

In [None]:
scoring_data.latlonnum

In [None]:
heating_mlp.shape

In [None]:
len(heating_mlp.shape)

In [None]:
assert heating_mlp.shape == (4380, 384, 60) or len(heating_mlp.shape) == 2

In [None]:
heating.sum(axis = 0).shape

In [None]:
heating.mean(axis = 0)[np.newaxis, :, :].shape

##### MAE

In [None]:
np.mean(np.abs(heating_mlp - heating))

In [None]:
hmm = np.abs(heating_mlp - heating).mean(axis = (0,1))

In [None]:
np.mean(hmm)

In [None]:
np.mean(np.abs(moistening_mlp - moistening))

In [None]:
np.mean(np.abs(netsw_mlp - netsw))

In [None]:
np.mean(np.abs(flwds_mlp - flwds))

In [None]:
np.mean(np.abs(precsc_mlp - precsc))

In [None]:
np.mean(np.abs(precc_mlp - precc))

In [None]:
np.mean(np.abs(sols_mlp - sols))

In [None]:
np.mean(np.abs(soll_mlp - soll))

In [None]:
np.mean(np.abs(solsd_mlp - solsd))

In [None]:
np.mean(np.abs(solld_mlp - solld))

##### RMSE

In [None]:
np.sqrt(np.mean((heating_mlp - heating)**2))

In [None]:
np.sqrt(np.mean((moistening_mlp - moistening)**2))

In [None]:
np.sqrt(np.mean((precsc_mlp - precsc)**2))

In [None]:
np.sqrt(np.mean((solld_mlp - solld)**2))