# Examples on how to load trained models and predict the error for the last Nordic4-SS run

# Example: NO_OSC

In [1]:
import numpy as np
import datetime
import matplotlib as mpl
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
import datetime as dt
import pickle
from numba import cuda
import gc

from storm_surge_oper.data_loader.preprocessor_operational_train_and_test import PreprocessInput
from storm_surge_oper.data_loader.preprocessor_operational_predict import PreprocessInput as PreprocessInputPredict
import storm_surge_oper.utils.helpers as hlp
from storm_surge_oper.data_loader.prepare_df_operational import PrepareDataFrames 

2023-08-22 12:58:51.582378: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [2]:
%matplotlib inline
mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

## Pre-trained models:

Models have bee trained with the script main_run_models_for_all_Stations.py. 

Train and test, feature and label, data are saved in a dictionary for each stations. 

Keras models have also been saved for each station and lead times



### 1) Load training data

In [3]:
x_y_dir = (
    '/lustre/storeB/project/IT/geout/machine-ocean/workspace/paulinast/operational/data/x_y/NO_OSC' 
    )

x_y_file = 'x_y_NO_OSC.pickle'

with open(x_y_dir + '/' + x_y_file, 'rb') as handle:
    b = pickle.load(handle)

In [4]:
b.keys()

dict_keys(['x_train', 'y_train', 'x_test', 'y_test', 'x_train_mean', 'x_train_std', 'y_train_mean', 'y_train_std', 'x_train_norm', 'y_train_norm', 'x_test_norm', 'y_test_norm', 'nan_idx'])

In [5]:
x_train = b['x_train']
y_train = b['y_train']
y_train_mean = b['y_train_mean']
y_train_std = b['y_train_std']

In [6]:
print(x_train.shape)
print(y_train.shape)

(1454, 2220)
(1454, 60)


### 2) Load pre-trained model for t = t0 + 1

In [7]:
model_dir = (
        '/lustre/storeB/project/IT/geout/machine-ocean/workspace/paulinast'
        + '/operational/ml_models/NO_OSC' 
    )

model_file = "best_model_NO_OSC_t1.h5"

model = keras.models.load_model(model_dir + '/' + model_file)

2023-08-22 12:58:55.226336: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-08-22 12:58:55.226599: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-08-22 12:58:55.226607: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2023-08-22 12:58:55.226624: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (c6525-hw8rl83-bn-compute): /proc/driver/nvidia/version does not exist
2023-08-22 12:58:55.226837: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, 

### 3) Preprocess test data

In [8]:
feature_vars = ['obs', 'tide_py', 'stormsurge_corrected', 'u10', 'v10'] 
label_var = 'stormsurge_corrected - (obs - tide_py)'

window_width_past = [24, 24, 0, 0, 0]
window_width_future = [-1, 60, 60, 60, 60]  # -1 does not include time t
horizon = 60  

feature_stations = [
        'NO_OSC', 
        'NO_AES', 
        'NO_BGO', 
        'NO_VIK', 
        'NO_TRG'
        ]

station = 'NO_OSC'

datetime_start_hourly = datetime_split=dt.datetime(2020, 3, 31, 12, 0, 0, tzinfo=dt.timezone.utc)

In [9]:
pp = PreprocessInputPredict(
    feature_vars = feature_vars,
    label_var = label_var,
    window_width_past = window_width_past,
    window_width_future = window_width_future,
    horizon = horizon,
    feature_stations = feature_stations,
    label_station = station,
    datetime_start_hourly = datetime_start_hourly,
    datetime_end_hourly = datetime_start_hourly,
    remove_nans = False,
    normalize=True,
    use_existing_files=False,
    past_forecast=24
    )
    
    

In [10]:
pp.preprocess_data()

Call PrepareDataFrames...
Preparing feature and label df...
Preparing auxiliary DataFrame...
list_all_variables:  ['stormsurge_corrected', '(obs - tide_py)', 'obs', 'tide_py']
Generating obs and tide_py DataFrame form operational data for the past...
dt_start:  2020-03-30 12:00:00+00:00
dt_end:  2020-03-31 12:00:00+00:00
station:  NO_AES
                           obs_t-24_NO_AES  obs_t-23_NO_AES  obs_t-22_NO_AES  \
2020-03-31 12:00:00+00:00             1.31             1.43             1.41   
2020-03-31 13:00:00+00:00             1.43             1.41             1.30   
2020-03-31 14:00:00+00:00             1.41             1.30             1.10   
2020-03-31 15:00:00+00:00             1.30             1.10             0.82   
2020-03-31 16:00:00+00:00             1.10             0.82             0.59   
2020-03-31 17:00:00+00:00             0.82             0.59             0.46   
2020-03-31 18:00:00+00:00             0.59             0.46             0.46   
2020-03-31 19:00:00+

  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_nam

Added (obs-tide) data.
Generating operational DataFrame...
station:  NO_AES  var:  stormsurge_corrected
/lustre/storeB/project/fou/hi/stormsurge_eps/2dEPS_archive/kyststasjoner_norge.nc2020033012
/lustre/storeB/project/fou/hi/stormsurge_eps/2dEPS_archive/kyststasjoner_norge.nc2020033100
/lustre/storeB/project/fou/hi/stormsurge_eps/2dEPS_archive/kyststasjoner_norge.nc2020033112
big_data_array.shape:  (3, 85)
station:  NO_VIK  var:  stormsurge_corrected
/lustre/storeB/project/fou/hi/stormsurge_eps/2dEPS_archive/kyststasjoner_norge.nc2020033012
/lustre/storeB/project/fou/hi/stormsurge_eps/2dEPS_archive/kyststasjoner_norge.nc2020033100
/lustre/storeB/project/fou/hi/stormsurge_eps/2dEPS_archive/kyststasjoner_norge.nc2020033112
big_data_array.shape:  (3, 85)
station:  NO_TRG  var:  stormsurge_corrected
/lustre/storeB/project/fou/hi/stormsurge_eps/2dEPS_archive/kyststasjoner_norge.nc2020033012
/lustre/storeB/project/fou/hi/stormsurge_eps/2dEPS_archive/kyststasjoner_norge.nc2020033100
/lustre/

  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_nam

Added (obs-tide) data.
Generating station forcing df...
Generate AROME df...
self.dt_start:  2020-03-31 12:00:00+00:00
self.dt_end:  2020-03-31 12:00:00+00:00
len(date_list)  1
date_list[0]:  2020-03-31 12:00:00+00:00
date_list[-1]:  2020-03-31 12:00:00+00:00
Generating path list...
dt_start.hou r == 12
dt_end.hour == 12
dt_start_arome:  2020-03-31 06:00:00+00:00
n_hours:  0
last hr:  0
last arome date:  2020-03-31 06:00:00+00:00
url_list[0]:  /lustre/storeB/immutable/archive/projects/metproduction/MEPS/2020/03/31/meps_det_2_5km_20200331T06Z.nc
url_list[-1]:  /lustre/storeB/immutable/archive/projects/metproduction/MEPS/2020/03/31/meps_det_2_5km_20200331T06Z.nc
len(url_list):  1
station:  NO_AES  var:  v10
Generating AROME arrays...
Generating  /lustre/storeB/immutable/archive/projects/metproduction/MEPS/2020/03/31/meps_det_2_5km_20200331T06Z.nc ...
data.shape:  (1, 61)
len(col_names):  61
station:  NO_AES  var:  u10
Generating AROME arrays...
Generating  /lustre/storeB/immutable/archiv

  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_name] = (
  df[col_nam

Added stormsurge - (obs-tide) data.
df:                             obs_t-24_NO_AES  obs_t-23_NO_AES  obs_t-22_NO_AES  \
2020-03-31 12:00:00+00:00             1.31             1.43             1.41   

                           obs_t-21_NO_AES  obs_t-20_NO_AES  obs_t-19_NO_AES  \
2020-03-31 12:00:00+00:00              1.3              1.1             0.82   

                           obs_t-18_NO_AES  obs_t-17_NO_AES  obs_t-16_NO_AES  \
2020-03-31 12:00:00+00:00             0.59             0.46             0.46   

                           obs_t-15_NO_AES  ...  tide_py_t51_NO_OSC  \
2020-03-31 12:00:00+00:00             0.55  ...                0.54   

                           tide_py_t52_NO_OSC  tide_py_t53_NO_OSC  \
2020-03-31 12:00:00+00:00                0.45                 0.4   

                           tide_py_t54_NO_OSC  tide_py_t55_NO_OSC  \
2020-03-31 12:00:00+00:00                 0.4                0.44   

                           tide_py_t56_NO_OSC  tide_py_

If data is missing in the either x or y, we do not run the ML models. Otherwise we need to decide which imputation method will be used.

In [16]:
pp.x_y_dict.keys()


dict_keys(['x', 'y', 'x_train_mean', 'x_train_std', 'y_train_mean', 'y_train_std', 'x_norm', 'y_norm'])

In [26]:
x_test = pp.x_y_dict['x']
y_test = pp.x_y_dict['y']
x_test_norm = pp.x_y_dict['x_norm']
y_test_norm = pp.x_y_dict['y_norm']
y_train_mean = pp.x_y_dict['y_train_mean']
y_train_std = pp.x_y_dict['y_train_std']

In [14]:
x_test.shape

(1, 2220)

In [15]:
y_test.shape

(1, 60)

### 4) Predict the error on the test data

In [18]:
y_test_norm_pred = model.predict(x_test_norm)
y_test_norm_pred

array([[-1.0656439]], dtype=float32)

In [21]:
y_test_norm[0, 0]


-0.9363869062794208

### 5) Unnormalize predicted value

In [23]:
y_test_pred = (y_test_norm_pred * y_train_std[0]) + y_train_mean[0]
y_test_pred

array([[-0.06498285]], dtype=float32)

In [28]:
y_test[0, 0]

-0.05661480873823166