In [1]:
import os

# Set the CUDA_VISIBLE_DEVICES environment variable
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Replace with the GPU index you want to use

# Confirm the selected GPU
print(f"Using GPU {os.environ['CUDA_VISIBLE_DEVICES']} for the experiment.")

Using GPU 0 for the experiment.


In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
import matplotlib.pyplot as plt
import h5py
import scipy
import math
import json
import seaborn as sns

2023-11-17 21:23:41.227291: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
# import keras; print(keras.__version__)

In [6]:
# print(tf.__version__)

In [3]:
#Prepare Data:
path_to_X = "../Normalised/X.npy"
path_to_Y = "../Normalised/Y.npy"
X = np.load(path_to_X)
Y = np.load(path_to_Y)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=42)
print(len(X_train))
print(X_train.shape)
print(len(X_test))
print(Y_train.shape)
print(len(Y_train))
print(X_test.shape)
print(len(Y_test))
print(Y_test.shape)

# Reshape the input and output data for the encoder and decoder
X_train_reshaped = X_train.reshape(X_train.shape[0], X_train.shape[1], 1) # (No of Data, 50) to (No of data, 50,1)
print("X_train_reshaped shape: ", X_train_reshaped.shape)
Y_train_reshaped = Y_train.reshape(Y_train.shape[0], Y_train.shape[1], 1)
print("Y_train_resahped shape: ", Y_train_reshaped.shape)
# Define the input shape for the encoder
input_shape = X_train_reshaped.shape[1:]  # Shape: (sequence_length, 1) # 50,1
print("Input_shape: ", input_shape)
X_test_reshaped = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
Y_test_reshaped = Y_test.reshape(Y_test.shape[0], Y_test.shape[1], 1)
print("X_test_reshaped",X_test_reshaped.shape)
print("Y_test_reshaped",Y_test_reshaped.shape)

49730
(49730, 50)
12433
(49730, 81)
49730
(12433, 50)
12433
(12433, 81)
X_train_reshaped shape:  (49730, 50, 1)
Y_train_resahped shape:  (49730, 81, 1)
Input_shape:  (50, 1)
X_test_reshaped (12433, 50, 1)
Y_test_reshaped (12433, 81, 1)


In [4]:
#Positional encoding is crucial for transformer models to understand the order of the elements in a sequence.
#This helps the model understand the sequential order of the input data.
def positional_encoding(seq_length, d_model):
     #This creates a tensor of shape (seq_length, 1) representing the positions of tokens in the sequence:
    pos = tf.range(seq_length, dtype=tf.float32)[:, tf.newaxis]
    
    #This creates an exponential term with different frequencies:
    i = tf.range(d_model, dtype=tf.float32)[tf.newaxis, :]
    angle_rates = 1 / tf.pow(10000, (2 * (i // 2)) / tf.cast(d_model, tf.float32))
    angle_rads = pos * angle_rates

    #The angles are split into sine and cosine components. This step is crucial for capturing different positional information.
    # Apply sin to even indices in the array; 2i
    sines = tf.math.sin(angle_rads[:, 0::2])
    # Apply cos to odd indices in the array; 2i+1
    cosines = tf.math.cos(angle_rads[:, 1::2])

    #Sine and cosine values are concatenated along the last axis to create the final positional encoding for a single position.
    pos_encoding = tf.concat([sines, cosines], axis=-1)
    #Finally, a batch dimension is added to the tensor to make it compatible with batched input sequences.
    pos_encoding = pos_encoding[tf.newaxis, ...]
    return tf.cast(pos_encoding, tf.float32)

def transformer_block(units, heads, dropout, ff_dim, name):
    #RM was: inputs = layers.Input(shape=(None, units)) #was this, may be incorrect
    #change to below to define the input layer::
    inputs = layers.Input(shape=(50, 1))  # Set the input shape explicitly

    # Self-attention
    #The transformer block uses a multi-head self-attention mechanism with the layers.MultiHeadAttention layer. 
    #RM was:attention = layers.MultiHeadAttention(key_dim=units // heads, num_heads=heads, dropout=dropout)(inputs, inputs)
    #key_dim=1 The dimensionality of the key space. In this case, it's set to 1 since you're dealing with 1D sequences.
    #num_heads=heads: The number of attention heads
    #dropout=dropout: The dropout rate applied to attention weights during training to prevent overfitting.
    #(inputs, inputs) In this case, the inputs are the same for both the query and key components of self-attention. 
    #This is common in self-attention mechanisms, where the input sequence itself is used to compute attention weights.
    attention = layers.MultiHeadAttention(key_dim=1, num_heads=heads, dropout=dropout)(inputs, inputs)

    #Dropout is applied after self-attention, and layer normalization is performed. 
    #These help with regularization and stabilizing training.
    attention = layers.Dropout(rate=dropout)(attention)
    attention = layers.LayerNormalization(epsilon=1e-6)(inputs + attention)

    # Feed-forward
    #Two 1D convolutional layers (Conv1D) are used for the feed-forward network.
    # The first one reduces the dimensionality with a ReLU activation
    #ReLU introduces non-linearity to the model, allowing it to learn more complex patterns in the data.
    # Increasing ff_dim might enhance the model's capacity to capture intricate patterns,
    # but it also comes with increased computational requirements.
    ff = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(attention)
    ff = layers.Dropout(rate=dropout)(ff)
    #the second one restores the dimensionality. 
    ff = layers.Conv1D(filters=units, kernel_size=1)(ff)
    # dropout and layer normalization are applied for regularization.
    ff = layers.Dropout(rate=dropout)(ff)
    ff = layers.LayerNormalization(epsilon=1e-6)(attention + ff)

    model = tf.keras.Model(inputs=inputs, outputs=ff, name=name)
    return model

def build_model(units, heads, dropout, ff_dim, num_blocks, input_length=50, output_length=81):
    # inputs = layers.Input(shape=(input_length, units))
    inputs = layers.Input(shape=(input_length, 1))
    x = inputs

    pos_encoding = positional_encoding(input_length, units)
    x = x + pos_encoding[:, :input_length, :]

    #multiple transformer blocks are stacked
    for i in range(num_blocks):
        x = transformer_block(units=units, heads=heads, dropout=dropout, ff_dim=ff_dim, name=f"transformer_block_{i}")(x)

    #After the transformer blocks, global average pooling is applied, 
    #followed by dropout and a dense layer with ReLU activation.
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(20, activation="relu")(x)
    x = layers.Dropout(0.1)(x)
    #Finally, a dense layer with a sigmoid activation function is used for the output layer,
    #reason why we use sigmoid function is because it exists between (0 to 1)
    outputs = layers.Dense(output_length, activation="sigmoid")(x)

    return tf.keras.Model(inputs=inputs, outputs=outputs)

In [9]:
# Set hyperparameters
units = 128
heads = 4
dropout = 0.2
ff_dim = 128
num_blocks = 3

# # Create the model
# model = build_model(units=units, heads=heads, dropout=dropout, ff_dim=ff_dim, num_blocks=num_blocks)
# # Compile the model
# model.compile(optimizer='adam', loss='mean_squared_error', metrics = ['mae'])
# # Print model summary
# model.summary()

In [10]:
save_path = "./Tr_P10_OA/"

In [6]:
# print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:2', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:3', device_type='GPU')]


In [11]:
# Define EarlyStopping and ModelCheckpoint callbacks
# early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
# model_checkpoint = tf.keras.callbacks.ModelCheckpoint(save_path+'best_tr_model.h5', save_best_only=True, verbose=1)

# # Training loop
# num_epochs = 200

# X_train_tf = tf.convert_to_tensor(X_train, dtype=tf.float32)
# Y_train_tf = tf.convert_to_tensor(Y_train, dtype=tf.float32)

# history = model.fit(
#     X_train_tf, Y_train_tf,
#     epochs=num_epochs,
#     batch_size = 32,
#     validation_split=0.2,  # 20% of the data will be used for validation
#     callbacks=[early_stopping, model_checkpoint]
# )

#Train the model
# Enable logging of device placement
tf.debugging.set_log_device_placement(False)

# Verify the available GPUs
gpus = tf.config.experimental.list_physical_devices('GPU')
print("GPUs:", gpus)
if gpus:
    # Use the first GPU for training
    with tf.device('/GPU:0'):
        # Create the model
        model = build_model(units=units, heads=heads, dropout=dropout, ff_dim=ff_dim, num_blocks=num_blocks)
        # Compile the model
        model.compile(optimizer='adam', loss='mean_squared_error', metrics = ['mae'])
        # Print model summary
        model.summary()
        early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
        model_checkpoint = tf.keras.callbacks.ModelCheckpoint(save_path+'best_tr_model.h5', save_best_only=True, verbose=1)
        
        # Training loop
        num_epochs = 200
        
        X_train_tf = tf.convert_to_tensor(X_train, dtype=tf.float32)
        Y_train_tf = tf.convert_to_tensor(Y_train, dtype=tf.float32)
        
        history = model.fit(
            X_train_reshaped, Y_train_reshaped,
            epochs=num_epochs,
            batch_size = 32,
            validation_split=0.2,  # 20% of the data will be used for validation
            callbacks=[early_stopping, model_checkpoint]
        )
        
else:
    print("No GPU available.")

# Save the history as a JSON file
with open(save_path+'history.json','w') as file:
            json.dump(history.history, file)



GPUs: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


2023-11-15 19:58:37.122566: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 78946 MB memory:  -> device: 0, name: NVIDIA A100 80GB PCIe, pci bus id: 0000:01:00.0, compute capability: 8.0


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 50, 1)]           0         
                                                                 
 tf.__operators__.add (TFOp  (None, 50, 128)           0         
 Lambda)                                                         
                                                                 
 transformer_block_0 (Funct  (None, None, 128)         99584     
 ional)                                                          
                                                                 
 transformer_block_1 (Funct  (None, None, 128)         99584     
 ional)                                                          
                                                                 
 transformer_block_2 (Funct  (None, None, 128)         99584     
 ional)                                                      

2023-11-15 19:58:42.117605: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2023-11-15 19:58:42.504617: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600
2023-11-15 19:58:43.121925: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f1af0007310 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-11-15 19:58:43.122000: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA A100 80GB PCIe, Compute Capability 8.0
2023-11-15 19:58:43.131073: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-11-15 19:58:43.255192: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the pro

Epoch 1: val_loss improved from inf to 0.05665, saving model to ./Tr_P10_OA/best_tr_model.h5
Epoch 2/200
   4/1244 [..............................] - ETA: 22s - loss: 0.0578 - mae: 0.1135

  saving_api.save_model(


Epoch 2: val_loss improved from 0.05665 to 0.04961, saving model to ./Tr_P10_OA/best_tr_model.h5
Epoch 3/200
Epoch 3: val_loss improved from 0.04961 to 0.04755, saving model to ./Tr_P10_OA/best_tr_model.h5
Epoch 4/200
Epoch 4: val_loss improved from 0.04755 to 0.04590, saving model to ./Tr_P10_OA/best_tr_model.h5
Epoch 5/200
Epoch 5: val_loss improved from 0.04590 to 0.04455, saving model to ./Tr_P10_OA/best_tr_model.h5
Epoch 6/200
Epoch 6: val_loss improved from 0.04455 to 0.04240, saving model to ./Tr_P10_OA/best_tr_model.h5
Epoch 7/200
Epoch 7: val_loss did not improve from 0.04240
Epoch 8/200
Epoch 8: val_loss improved from 0.04240 to 0.04239, saving model to ./Tr_P10_OA/best_tr_model.h5
Epoch 9/200
Epoch 9: val_loss did not improve from 0.04239
Epoch 10/200
Epoch 10: val_loss improved from 0.04239 to 0.04187, saving model to ./Tr_P10_OA/best_tr_model.h5
Epoch 11/200
Epoch 11: val_loss improved from 0.04187 to 0.04071, saving model to ./Tr_P10_OA/best_tr_model.h5
Epoch 12/200
Epoch

In [None]:
f = open(save_path+"history.json")
history = json.load(f)
# summarize history for loss
mod_name = "Tr_P10_OA"
plt.plot(history['loss'], label='Training Loss')
plt.plot(history['val_loss'], label='Validation Loss')
plt.title('Transformer '+mod_name+' training and validation loss over time')
plt.ylabel('Loss (MSE)')
plt.xlabel('Epochs')
plt.legend()
plt.grid(True)
#plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
# Load the best model
best_model = tf.keras.models.load_model(save_path+'best_tr_model.h5')

# Evaluation on the test set
best_model.evaluate(test_dataset)
# Generate predictions on the test data
predictions = model.predict(X_test_tf)

# Save the predictions and ground_truth as a NumPy array
np.save(save_path+'predictions.npy', predictions)
#Save the ground truth:
np.save(save_path+"ground-truth.npy", Y_test)

# Calculate evaluation metrics
mse = mean_squared_error(Y_test, predictions)
mae = mean_absolute_error(Y_test, predictions)
rmse = np.sqrt(mse)

print(f'Mean Squared Error (MSE): {mse}')
print(f'Mean Absolute Error (MAE): {mae}')
print(f'Root Mean Squared Error (RMSE): {rmse}')


In [None]:
#print some samples
y_pred = np.load(save_path+"predictions.npy")
y_true = np.load(save_path +"ground-truth.npy")

print(y_pred.shape)
print(y_true.shape)
#Print some samples:
for sample in range(5):
    print(f"SAMPLE: {sample}")
    for i in range (81):
        print(f"Index {i} Ground truth: {y_true[sample][i]} Predicted: {y_pred[sample][i]}")

In [None]:
#Benesh convert
#Error Array
#Box Plot
#MAPE
#R2