In [1]:
import os 
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'
import sys 
sys.path.insert(1, os.path.dirname(os.getcwd()))
from functions import *
import numpy as np 
import h5py
import tensorflow as tf
tf.keras.utils.set_random_seed(1)

In [2]:
# MODEL HYPERPARAMETERS
epochs = 5 #20000
train_size = 80 
val_size = 20
kernel_size = 3
nodes = [64,64,64,64]
activation = 'linear'
batch_size = 16
lr = 0.001
min_lr = 0.0001

# LOADING TRAINING AND VALIDATION DATA
const_dict = load_constants() 
Uf, P, T_h, T_0, Pr, Ra = get_model_constants(const_dict) 
ae_train, ae_val, x, z = load_ae_data(train_size, val_size, batch_size, Uf, P, T_h, T_0)


# LOSS FUNCTION (MSE)
# 4 components are separated for loss dynamic loss balancing
@tf.function(input_signature=[tf.TensorSpec(shape=[batch_size,256,256,4], dtype=tf.float32),
                              tf.TensorSpec(shape=[batch_size,256,256,4], dtype=tf.float32)])
def my_loss(U_true, U_pred):
  losses = tf.reduce_mean(tf.math.square(U_pred-U_true), axis=[0,1,2])
  return losses[0], losses[1], losses[2], losses[3]
  
# BUILD MODEL
optimizer = tf.keras.optimizers.Adam(lr)
autoencoder = build_ae(nodes, kernel_size, activation)

In [3]:
# TRAIN STEP FUNCTION
## For each batch we compute balanced loss
@tf.function(input_signature=[tf.TensorSpec(shape=[batch_size,256,256,4], dtype=tf.float32),
                              tf.TensorSpec(shape=[batch_size,256,256,4], dtype=tf.float32),
                              tf.TensorSpec(shape=[4], dtype=tf.float32)])
def train_step(x_batch, U_batch, lambdas):
  with tf.GradientTape(persistent=True) as tape:
    U_pred = autoencoder(x_batch, training=True)
    losses = my_loss(U_batch, U_pred) 
    loss_u, loss_w, loss_p, loss_T = losses[0], losses[1], losses[2], losses[3] 
    loss = loss_u*lambdas[0] + loss_w*lambdas[1] + loss_p*lambdas[2] + loss_T*lambdas[3]
    
  loss_data = tf.stack([loss_u, loss_w, loss_p, loss_T], axis=0)
  gradients = tape.gradient(loss, autoencoder.trainable_variables)
  optimizer.apply_gradients(zip(gradients, autoencoder.trainable_variables)) 
  return loss_data

# VALIDATION STEP FUNCTION
## Tracking validation loss
@tf.function(input_signature=[tf.TensorSpec(shape=[batch_size,256,256,4], dtype=tf.float32),
                              tf.TensorSpec(shape=[batch_size,256,256,4], dtype=tf.float32)])
def val_step(x_batch, U_batch):
  U_pred = autoencoder(x_batch, training=False)
  losses = my_loss(U_batch, U_pred) 
  loss_u, loss_w, loss_p, loss_T = losses[0], losses[1], losses[2], losses[3] 
  return (loss_u + loss_w + loss_p + loss_T) / 4.

In [4]:
# HELPER VARIABLES DURING TRAINING
learning_rate = lr
best_loss = float('inf') 
lambdas = tf.Variable(tf.ones([4], tf.float32) / 4, trainable=False)
loss_history = tf.Variable(tf.zeros([4]), dtype=tf.float32, trainable=False)

Ld = tf.Variable(tf.ones([4]), dtype=tf.float32, trainable=False)
step_tf = tf.Variable(0., dtype=tf.float32, trainable=False) 
step_val_tf = tf.Variable(0., dtype=tf.float32, trainable=False) 
w = tf.Variable(tf.zeros([4]), dtype=tf.float32, trainable=False) 
val_loss = tf.Variable(0., dtype=tf.float32, trainable=False)

In [5]:
# RUN TRAINING
for epoch in range(epochs):

  # loop over the training data
  for step, (x_batch, U_batch) in enumerate(ae_train):
    step_tf.assign(step)
    Ld_temp = train_step(x_batch, U_batch, lambdas)
    Ld.assign_add(Ld_temp)
       
  Ld.assign( Ld / (step_tf+1.) )
  Ldata = tf.math.reduce_mean(Ld)
  
  if epoch >= 1:
    w.assign(Ld / loss_history)  
    lambdas.assign(tf.nn.softmax(w))
  loss_history.assign(Ld)

  # loop over validation data
  for step_val, (x_batch_val, U_batch_val) in enumerate(ae_val):
    step_val_tf.assign(step_val)
    val_loss.assign_add( val_step(x_batch_val, U_batch_val) )

  val_loss.assign( val_loss / (step_val_tf+1.) )

  # LEARNING RATE SCHEDULER IF VALIDATION LOSS PLATEAUS
  if epoch >= 100:
    if (best_loss - val_loss) > 1e-4: 
      best_loss = val_loss
      wait = 0
    else:
      wait += 1
      
    if wait >= 20:
      new_lr = max(learning_rate * 0.8, min_lr)
      if new_lr < learning_rate:
        learning_rate = new_lr
        optimizer.learning_rate.assign(learning_rate)
      wait = 0
      
  log = f"Epoch {epoch+1}, Loss: {Ldata.numpy():.2e}, Val Loss: {val_loss.numpy():.2e}, "
  log2 = f"(u): {Ld[0].numpy():.2e}, (w): {Ld[1].numpy():.2e}, (p): {Ld[2].numpy():.2e}, (T): {Ld[3].numpy():.2e}"  
  print(log+log2)
   
  Ld.assign(tf.zeros_like(Ld))  
  val_loss.assign(tf.constant(0.))  
    
autoencoder.save_weights('cae.weights.h5', overwrite=True)
autoencoder.save('cae.keras', overwrite=True)

Epoch 1, Loss: 4.47e-01, Val Loss: 1.03e-01, (u): 4.67e-01, (w): 4.13e-01, (p): 4.60e-01, (T): 4.46e-01
Epoch 2, Loss: 6.98e-02, Val Loss: 4.44e-02, (u): 7.78e-02, (w): 6.78e-02, (p): 6.17e-02, (T): 7.19e-02
Epoch 3, Loss: 3.70e-02, Val Loss: 3.06e-02, (u): 4.26e-02, (w): 4.49e-02, (p): 1.98e-02, (T): 4.07e-02
Epoch 4, Loss: 2.73e-02, Val Loss: 2.39e-02, (u): 2.53e-02, (w): 3.86e-02, (p): 1.23e-02, (T): 3.30e-02
Epoch 5, Loss: 2.22e-02, Val Loss: 2.03e-02, (u): 1.69e-02, (w): 3.35e-02, (p): 9.37e-03, (T): 2.90e-02
