## Hyper parameter

In [None]:
# InputFeature = ['Wind_Vel', 'Wind_Dir_deg'] # CaseNum = 1
InputFeature = ['Cur_Dir_deg', 'Wind_Vel', 'Wind_Dir_deg'] # CaseNum = 2
# InputFeature = ['Cur_Vel', 'Cur_Dir_deg', 'Water_Temp', 'Salty', 'Wind_Vel', 'Wind_Dir_deg', 'Air_Temp', 'Air_Pressure'] # CaseNum = 3

NodeNum = 30 # Hidden layer node #
LayerNum = 3 # Hidden layer #
BatchNum = 10 # Batch num
epochNum =500 # EpochNum
patienceNum=500 #PatienceNum = EpochNum
Act_hid_fn = 'relu' # relu tanh, sigmoid
# Act_out_fn = 'sigmoid'
Opti_fn = 'adam'  
loss_fn ='mae'  # 'mae', mse' 'logcosh'
case_name = 'Input3_node'+str(NodeNum)+'_layer'+str(LayerNum)+'_batch'+str(BatchNum)+'_epoch'+str(epochNum)+'_'+loss_fn  # case name for graph


## Import

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]='-1'
# CPU :'os.environ["CUDA_VISIBLE_DEVICES"]='-1' , GPU :'os.environ["CUDA_VISIBLE_DEVICES"]='0'

# To use dataframe
import pandas as pd

# Machine learning library and API
import tensorflow as tf
from tensorflow import keras

# To use numpy array
import numpy as np

# To plot
import matplotlib.pyplot as plt

np.random.seed(777)
tf.random.set_seed(777)

## Check availability of GPU

In [3]:
tf.test.gpu_device_name()

2022-03-23 13:12:21.620303: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-03-23 13:12:21.637421: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-03-23 13:12:21.637449: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:163] no NVIDIA GPU device is present: /dev/nvidia0 does not exist


''

## Load data

In [4]:
data = pd.read_csv('../data/data_KS.csv')

## Check data type

In [5]:
data.dtypes

Date_Time        object
Cur_Vel         float64
Cur_Dir_deg       int64
Water_Temp      float64
Salty           float64
Sig_Wave_H      float64
Sig_Wave_P      float64
Max_Wave_H      float64
Max_Wave_P      float64
Wave_Dir_deg      int64
Wind_Vel        float64
Wind_Dir_deg      int64
Air_Temp        float64
Air_Pressure    float64
dtype: object

## Check exist of NaN data

In [6]:
data.shape == data.dropna().shape # True: NaN data is not exist

True

## Type conversion (int --> float)

In [7]:
data.iloc[:,[1,2,3,4,5,6,7,8,9,10,11,12,13]] = data.iloc[:,[1,2,3,4,5,6,7,8,9,10,11,12,13]].astype('float64')

## Data slicing

In [8]:
data_train = data[data.Date_Time.between('2012-01-01 00:00:00', '2018-12-31 23:30:00')]
data_val = data[data.Date_Time.between('2019-01-01 00:00:00', '2019-12-31 23:30:00')]
data_test = data[data.Date_Time.between('2020-01-01 00:00:00', '2020-12-31 23:30:00')]

## Neural Network

In [9]:
tf.__version__

keras.__version__

X_data_train = data_train.loc[:, InputFeature].to_numpy()
X_data_val = data_val.loc[:, InputFeature].to_numpy()
X_data_test = data_test.loc[:, InputFeature].to_numpy()
#print(X_data_train)

## Standardization

In [10]:
X_mean_train = X_data_train.mean(axis=0)
std_train = X_data_train.std(axis=0)

X_data_train_ = X_data_train - X_mean_train
X_data_train_ /= std_train

X_data_val_ = X_data_val - X_mean_train
X_data_val_ /= std_train

X_data_test_ = X_data_test - X_mean_train
X_data_test_ /= std_train

# print(X_data_train_)

## Input data set-up

In [11]:
## Y value
#### 1) Significant wave height
#### 2) Wave period
#### 3) Wave direction

Y_data_train_ = data_train.Sig_Wave_H.to_numpy()
Y_data_val_ = data_val.Sig_Wave_H.to_numpy()
Y_data_test_ = data_test.Sig_Wave_H.to_numpy()

X_data_train_

X_train = X_data_train_
Y_train = Y_data_train_

X_valid = X_data_val_
Y_valid = Y_data_val_

X_test = X_data_test_
Y_test = Y_data_test_

## Directory set-up for result data

In [12]:
MODEL_SAVE_FOLDER_PATH = './model/'+case_name+'/'
if not os.path.exists(MODEL_SAVE_FOLDER_PATH):
    os.mkdir(MODEL_SAVE_FOLDER_PATH)
model_path = MODEL_SAVE_FOLDER_PATH + 'ocean-' + '{epoch:02d}-{val_loss:.4f}.hdf5'

## Neural network model

In [13]:
# To save model during learning process
cb_checkpoint = keras.callbacks.ModelCheckpoint(filepath=model_path, monitor='val_loss', verbose=1, save_best_only=True)

# To check best model
cb_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=patienceNum)

# To make model
model = keras.models.Sequential()
model.add(keras.layers.Dense(NodeNum, activation= Act_hid_fn, input_shape=(X_train.shape[1],)))
for i in range(LayerNum-1):
    model.add(keras.layers.Dense(NodeNum, activation=Act_hid_fn))
model.add(keras.layers.Dense(1)) #, activation=Act_out_fn
model.compile(optimizer = Opti_fn, loss = loss_fn, metrics=['mae','mse'])   # mae --> mse

# model.layers
# model.summary()

# keras.utils.plot_model(model, "real_simple_model.png", show_shapes=True)

## Learning process

In [None]:
history = model.fit(X_train, Y_train, epochs=epochNum, batch_size=BatchNum, validation_data = (X_valid, Y_valid),
                   callbacks=[cb_checkpoint, cb_early_stopping])

2022-03-23 13:12:22.172923: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/500

Epoch 00001: val_loss improved from inf to 0.39919, saving model to ./model/Input3_node30_layer3_batch10_epoch500_mae/ocean-01-0.3992.hdf5
Epoch 2/500

Epoch 00002: val_loss improved from 0.39919 to 0.39091, saving model to ./model/Input3_node30_layer3_batch10_epoch500_mae/ocean-02-0.3909.hdf5
Epoch 3/500

Epoch 00003: val_loss did not improve from 0.39091
Epoch 4/500

Epoch 00004: val_loss improved from 0.39091 to 0.38421, saving model to ./model/Input3_node30_layer3_batch10_epoch500_mae/ocean-04-0.3842.hdf5
Epoch 5/500

Epoch 00005: val_loss did not improve from 0.38421
Epoch 6/500

Epoch 00006: val_loss did not improve from 0.38421
Epoch 7/500

Epoch 00007: val_loss did not improve from 0.38421
Epoch 8/500

Epoch 00008: val_loss did not improve from 0.38421
Epoch 9/500

Epoch 00009: val_loss did not improve from 0.38421
Epoch 10/500

Epoch 00010: val_loss did not improve from 0.38421
Epoch 11/500

Epoch 00011: val_loss improved from 0.38421 to 0.37377, saving model to .

## Plot: Learning curve

In [None]:
plt.plot(pd.DataFrame(history.history)[['loss']],'b', label='training')
plt.plot(pd.DataFrame(history.history)[['val_loss']],'g', label='validation')
# plt.figure(figsize=(5,5), dpi=10000, facecolor='w', edgecolor='k')
plt.xlabel("Epoch")
plt.ylabel("Loss(MAE)")  # "Loss(MAE)"  logcosh
plt.rcParams['figure.figsize'] = [15, 6]
font = {'weight' : 'normal',
       'size' : 15}
plt.rc('font', **font)
plt.title('<Learning curve> Node #:' + str(NodeNum)+' / Layer #:'+str(LayerNum))
plt.grid(True)
plt.legend()
plt.xlim(-1, 301)
plt.ylim(0, 1)
plt.savefig(MODEL_SAVE_FOLDER_PATH+'learningcurve_'+case_name+'.png', dpi=300)
plt.show()

predict_targets = model.predict(X_test)
test_targets = Y_test

# test_mse_score, test_mae_score = model.evaluate(X_test, Y_test)
a = model.evaluate(X_test, Y_test)
# a, b, c, d = model.evaluate(X_test, Y_test)
# test_mae_score # test_mae_score test_mse_score
test_mae_score = a[1]


from pandas import Timestamp

Test_Date_Time = pd.to_datetime(data_test.Date_Time)

## Plot: Time-series (Final model)

In [None]:
# plt.plot(Test_Date_Time,test_targets, 'b.',label='measured', markersize=2)
# plt.plot(Test_Date_Time,predict_targets,'r.',label='predicted', markersize=2)

plt.plot(test_targets, 'b.',label='measured', markersize=2)
plt.plot(predict_targets,'r.',label='predicted', markersize=2)

plt.title('<Time series> Node #:' + str(NodeNum)+' / Layer #:'+str(LayerNum)+' / Batch #:'+str(BatchNum))
plt.ylabel('Significant wave height(m)')
plt.xlabel('Time')
plt.xticks(rotation=20)

plt.rcParams['figure.figsize'] = [15, 6]
plt.grid(True)
plt.legend()
plt.text(0, 7.3, ' MAE_test: '+str(test_mae_score))
# plt.text(0, 7.3, ' MSE_test: '+str(test_mse_score))
plt.savefig(MODEL_SAVE_FOLDER_PATH+'timeseries'+case_name+'.png', dpi=300)
plt.show()

## Load best model

In [None]:
model_list = os.listdir(MODEL_SAVE_FOLDER_PATH)

maxNum=0
for ii in range(len(model_list)):
    if len(model_list[ii].split('-'))>1:
        if int(model_list[ii].split('-')[1])>= maxNum:
            maxNum= int(model_list[ii].split('-')[1])
            maxIdx = ii
model_list[maxIdx]

model_load_path = MODEL_SAVE_FOLDER_PATH+model_list[maxIdx]
best_model = keras.models.load_model(model_load_path)

# test_mse_score_best, test_mae_score_best = best_model.evaluate(X_test, Y_test)
# test_mae_score_best #test_mae_score_best test_mse_score_best
b = best_model.evaluate(X_test, Y_test)
test_mae_score_best = b[1]
best_predict_targets = best_model.predict(X_test)

In [None]:
best_predict_targets.shape

In [None]:
test_mae_score_best #test_mse_score_best

## Plot: Time-series (Best model)

In [None]:
from pandas import Timestamp

Test_Date_Time = pd.to_datetime(data_test.Date_Time)

# plt.plot(Test_Date_Time,test_targets, 'b.',label='measured', markersize=2)
# plt.plot(Test_Date_Time,best_predict_targets,'r.',label='predicted', markersize=2)

plt.plot(test_targets, 'b.',label='measured', markersize=2)
plt.plot(best_predict_targets,'r.',label='predicted', markersize=2)


plt.title('<Time series> Node #:' + str(NodeNum)+' / Layer #:'+str(LayerNum))
plt.ylabel('Significant wave height(m)')
plt.xlabel('Time')
plt.xticks(rotation=20)

plt.rcParams['figure.figsize'] = [15, 6]
plt.grid(True)
plt.legend()
plt.text(0, 7.3, ' MAE_test: '+str(test_mae_score_best))
# plt.text(0, 7.3, ' MSE_test: '+str(test_mse_score_best))
plt.savefig(MODEL_SAVE_FOLDER_PATH+'timeseries_best'+case_name+'.png', dpi=300)
plt.show()

In [None]:
np.savetxt(MODEL_SAVE_FOLDER_PATH+case_name+'.txt',(best_predict_targets),header = 'Sig.wave height (m)')

In [None]:
MODEL_SAVE_FOLDER_PATH+case_name+'.txt'