In [1]:
# %%
import os
import numpy as np
import pandas as pd
from pathlib import Path
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold, train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Flatten, BatchNormalization, Dropout, Masking
from keras.layers.convolutional import Conv1D, MaxPooling1D, AveragePooling1D
from tensorflow.keras.optimizers import RMSprop, Adam
import tensorflow.keras.backend as K
import time

# %%
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

# %%
#### Change inputs
modelID = '93ks'
## noaa stations following same order of the outputs
NOAAstations = ['Duck', 'Oregon', 'Hatteras', 'Beaufort', 'Wilmington', 'Wrightsville', 'Albemarle', 'Pamlico', 'Neuse']

#### Define paths and load data
# pathData = Path(r'../../../data/random_split')
pathData = Path(r'/mnt/drive1/Insyncs/NCSU/thesis/models/NNmodel/inputs/random_split')
# pathColSample = pathData.parent
pathColSample = Path(r'/mnt/drive1/Insyncs/NCSU/thesis/models/adcirc/concorde/batch02/_postprocessing/_preprocessForNN')
X_train_file = 'X_train_standardScaled_allInputs_augmentedAllX50_ALL.npy'
Y_train_file = 'y_train_augmentedAllX50_ALL.npy'
X_test_file = 'X_test_standardScaled_allInputs_augmentedAllX50_ALL.npy'
Y_test_file = 'y_test_augmentedAllX50_ALL.npy'

#### some hyperparameters
batch_size = 100
epochs = 950
fold = 1 ## no cross validation

2024-01-19 09:34:24.172306: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-01-19 09:34:25.160711: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/tacuevas/miniconda3/envs/tf2/lib/
2024-01-19 09:34:25.160847: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/tacuevas/miniconda3/envs/tf2/lib/


In [2]:
#### path to store outputs
#pathOut0 = Path(r'/mnt/drive1/Insyncs/NCSU/thesis/models/NNmodel/81')
pathOut = Path(f'../models/NNmodel/1DCNN_final_architecture/fftAndOffshoreTides/{modelID}')
#pathOut0 = pathOut0/st

#### class to save best model
class CustomCallback(tf.keras.callbacks.Callback):
    def __init__(self, pathout, fold, modelID):
        super(CustomCallback, self).__init__()
        self.pathout = pathout
        self.fold = fold
        self.modelID = modelID
        self.previous_val_loss = float('inf')  # Initialize with a high value
        self.best_epoch = None
        self.best_model = None
    
    def on_epoch_end(self, epoch, logs=None):
        current_val_loss = logs.get('val_loss')
        if current_val_loss is not None and current_val_loss < self.previous_val_loss:
            self.model.save(self.pathout / f'bestModel_{self.modelID}_noVal.tf')
            self.previous_val_loss = current_val_loss
            self.best_epoch = epoch
            self.best_model = self.model
            with open(self.pathout / f'best_model_noVal.txt', 'a') as fout:
                fout.write(f'Best model saved for fold {self.fold}: epoch {self.best_epoch}, val_loss: {current_val_loss:0.3f}\n')

#### load data
X_train = np.load(pathData/X_train_file)
y_train = np.load(pathData/Y_train_file)
X_test = np.load(pathData/X_test_file)
y_test = np.load(pathData/Y_test_file)

#### pathout
#pathOut = pathOut0/st

columns_sample = pd.read_csv(pathColSample/'dct_tracksAll_batch02_ALL_lengthCorr_tides_resampled_SAMPLE.csv', index_col = 0)

## inputs
cols = ['wind_speed', 'pressure', 'rad_to_max_ws', 'forward_speed_u', 'forward_speed_v',
        'wind_speed_fft', 'pressure_fft', 'rad_to_max_ws_fft', 'forward_speed_u_fft', 'forward_speed_v_fft',
            'dist_to_duck', 'dist_to_oregon', 'dist_to_hatteras', 'dist_to_beaufort', 'dist_to_wilmington', 
            'dist_to_wrightsville', 'dist_to_Albemarle', 'dist_to_Pamlico', 'dist_to_Neuse', 'Boundary']

In [3]:
## extract inputs idx from the full input array
idx_cols = [list(columns_sample).index(x) for x in cols]
X_train = X_train[:, :, idx_cols]
X_test = X_test[:, :, idx_cols]

In [4]:
try:
    os.mkdir(pathOut)
except:
    pass

#### Define model
model = Sequential([
                Masking(-9999, input_shape=(X_train.shape[1:])),
                Conv1D(16, kernel_size=3, activation='relu'),
                BatchNormalization(),
                MaxPooling1D(pool_size=2),
                Conv1D(32, kernel_size=3, activation='relu'),
                BatchNormalization(),
                MaxPooling1D(pool_size=2),
                Conv1D(64, kernel_size=3, activation='relu'),
                BatchNormalization(),
                MaxPooling1D(pool_size=2),
                Flatten(),
                Dense(64, activation='relu'),
                Dropout(0.2),
                Dense(32, activation='relu'),
                Dropout(0.2),
                Dense(9, activation='relu'),
            ])

optimizer = RMSprop(learning_rate = 1e-4)
model.compile(optimizer=optimizer, loss=tf.keras.losses.Huber(), 
                metrics=['mse', 'mae', tf.keras.metrics.RootMeanSquaredError(name='rmse')])
model.summary()

#### train the model
t0 = time.time()
history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1)
model.save(pathOut / f'bestModel_{modelID}_nonVal.tf')
print(f'Training time: {(time.time() - t0)/3600:0.3f} hrs')

####  predictions
predictions = model.predict(X_test)
dfPred = pd.DataFrame(predictions, columns = [f'{x}_pred' for x in NOAAstations])
dfTest = pd.DataFrame(y_test.reshape(y_test.shape[:2]), columns = NOAAstations)
dfAll = pd.concat([dfTest, dfPred], axis = 1)
dfAll.to_csv(pathOut/f'predTestSet_{modelID}.csv')

# for i in range(6):
#     fig, ax = plt.subplots(figsize = (4,4))
#     sns.regplot(x = dfTest.iloc[:, i], y = dfPred.iloc[:, i], ax = ax, fit_reg = False)
#     ax.plot(np.arange(0, 5, 0.5), np.arange(0, 5, 0.5), ls = '--', c = 'k')
#     ax.set_title(f'{NOAAstations[i]}')
#     fig.savefig(pathOut/f'predTestSet_{modelID}_{NOAAstations[i]}.png',
#                 dpi = 100, bbox_inches = 'tight')

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 masking (Masking)           (None, 235, 20)           0         
                                                                 
 conv1d (Conv1D)             (None, 233, 16)           976       
                                                                 
 batch_normalization (BatchN  (None, 233, 16)          64        
 ormalization)                                                   
                                                                 
 max_pooling1d (MaxPooling1D  (None, 116, 16)          0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 114, 32)           1568      
                                                                 
 batch_normalization_1 (Batc  (None, 114, 32)          1

2024-01-19 09:34:34.128574: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-01-19 09:34:35.451338: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4356 MB memory:  -> device: 0, name: Quadro P2000, pci bus id: 0000:5b:00.0, compute capability: 6.1
2024-01-19 09:34:35.452235: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 3588 MB memory:  -> device: 1, name: Quadro P2000, pci bus id: 0000:9e:00.0, compute capability: 6.1


 1D)                                                             
                                                                 
 conv1d_2 (Conv1D)           (None, 55, 64)            6208      
                                                                 
 batch_normalization_2 (Batc  (None, 55, 64)           256       
 hNormalization)                                                 
                                                                 
 max_pooling1d_2 (MaxPooling  (None, 27, 64)           0         
 1D)                                                             
                                                                 
 flatten (Flatten)           (None, 1728)              0         
                                                                 
 dense (Dense)               (None, 64)                110656    
                                                                 
 dropout (Dropout)           (None, 64)                0         
          

2024-01-19 09:34:42.119422: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8100
2024-01-19 09:34:42.952093: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-01-19 09:34:43.818635: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x7f0ee800cfb0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-01-19 09:34:43.818726: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): Quadro P2000, Compute Capability 6.1
2024-01-19 09:34:43.818751: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (1): Quadro P2000, Compute Capability 6.1
2024-01-19 09:34:43.849450: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-01-19 09:34:44.045371: I tensorflow/tsl/platform/default/subprocess.cc

Epoch 2/950
Epoch 3/950
Epoch 4/950
Epoch 5/950
Epoch 6/950
Epoch 7/950
Epoch 8/950
Epoch 9/950
Epoch 10/950
Epoch 11/950
Epoch 12/950
Epoch 13/950
Epoch 14/950
Epoch 15/950
Epoch 16/950
Epoch 17/950
Epoch 18/950
Epoch 19/950
Epoch 20/950
Epoch 21/950
Epoch 22/950
Epoch 23/950
Epoch 24/950
Epoch 25/950
Epoch 26/950
Epoch 27/950
Epoch 28/950
Epoch 29/950
Epoch 30/950
Epoch 31/950
Epoch 32/950
Epoch 33/950
Epoch 34/950
Epoch 35/950
Epoch 36/950
Epoch 37/950
Epoch 38/950
Epoch 39/950
Epoch 40/950
Epoch 41/950
Epoch 42/950
Epoch 43/950
Epoch 44/950
Epoch 45/950
Epoch 46/950
Epoch 47/950
Epoch 48/950
Epoch 49/950
Epoch 50/950
Epoch 51/950
Epoch 52/950
Epoch 53/950
Epoch 54/950
Epoch 55/950
Epoch 56/950
Epoch 57/950
Epoch 58/950
Epoch 59/950
Epoch 60/950
Epoch 61/950
Epoch 62/950
Epoch 63/950
Epoch 64/950
Epoch 65/950
Epoch 66/950
Epoch 67/950
Epoch 68/950
Epoch 69/950
Epoch 70/950
Epoch 71/950
Epoch 72/950
Epoch 73/950
Epoch 74/950
Epoch 75/950
Epoch 76/950
Epoch 77/950
Epoch 78/950
Epoch 7



INFO:tensorflow:Assets written to: ../models/NNmodel/1DCNN_final_architecture/fftAndOffshoreTides/93ks/bestModel_93ks_nonVal.tf/assets


INFO:tensorflow:Assets written to: ../models/NNmodel/1DCNN_final_architecture/fftAndOffshoreTides/93ks/bestModel_93ks_nonVal.tf/assets


Training time: 3.845 hrs
