# Collaborative RUL Estimation of Turbofan Engines using NCMAPSS

In [None]:
# Datasets
import h5py
import math
import pandas as pd
import operator
from pandas import DataFrame
# Helper libraries
import numpy as np
from numpy.lib.stride_tricks import as_strided
from itertools import chain
import os
import random
import time
import itertools
import pyarrow as pa
import pyarrow.parquet as pq
# Import TensorFlow
import tensorflow as tf
#Keras
from keras.models import Sequential,load_model
from keras.models import model_from_json
from keras import optimizers
import keras.backend as K
from tensorflow import keras
from keras.optimizers import SGD
from keras import layers
from keras import models
from keras import regularizers
from keras import layers
import keras.backend as K
#Matplot
import matplotlib.pyplot as plt
import matplotlib
from matplotlib import gridspec
#sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from itertools import product
from sklearn.utils import shuffle

for pcidev in $(lspci -D|grep 'VGA compatible controller: NVIDIA'|sed -e 's/[[:space:]].*//'); do echo 0 > /sys/bus/pci/devices/${pcidev}/numa_node; done

for a in /sys/bus/pci/devices/*; do echo 0 | sudo tee -a $a/numa_node; done

In [None]:
gpus = tf.config.list_physical_devices(device_type = 'GPU')
print(gpus)
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

## Create a strategy to distribute the variables and the graph

In [None]:
strategy = tf.distribute.MirroredStrategy(["GPU:0", "GPU:1","GPU:3"])

In [None]:
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
tf.compat.v1.enable_eager_execution()

# Global Variables

In [None]:
# Hyperparameters
NODES = 3
WINDOW_LEN = 50
STRIDE = 6
BATCH_SIZE = 128
LAYERS = [64,32]
LEARNING_RATE = 0.001
BATCH_SIZE = 128
EPOCHS = 3

In [None]:
# Global Sync Variables
M_LABELS_TRAIN=[*range(0,NODES)]
M_LABELS_EVAL=[*range(0,NODES)]
SHUFFLE_IDX = {}
TRAINING_IDX = {}
VAL_IDX = {}
MAX_DATASET_SIZE_TRAIN=0
MAX_DATASET_SIZE_EVAL=0

## Auxiliar Functions

In [None]:
def split_sequences(input_data, sequence_length, stride = 1, option = None):
    """
     
    """
    X = list()
    
    for i in range(0,len(input_data),stride):
        # find the end of this pattern
        end_ix = i + sequence_length
        
        # check if we are beyond the dataset
        if end_ix > len(input_data):
            break
        
        # gather input and output parts of the pattern
        if option=='last':
            seq_x = input_data[end_ix-1, :]
        elif option=='next':
            seq_x = input_data[end_ix, :]
        else:
            seq_x = input_data[i:end_ix, :]
        X.append(seq_x)
    
    return np.array(X)
def sequence_generator(input_data, units, cycles, sequence_length=10,stride = 1, option=None):
    """
     # Generates dataset with windows of sequence_length      
    """  
    X = list()
    unit_num=[]
    c_num =[]
    for i, elem_u in enumerate(list(np.unique(units))):
        mask = np.ravel(units==elem_u)
        c_mask = cycles[mask]
        x_unit = input_data[mask]
        for j in np.unique(c_mask):
            mask = np.ravel(c_mask==j)
            seq_x_u = split_sequences(x_unit[mask],sequence_length, stride, option)
            X.append(seq_x_u)
            unit_num.extend(np.ones(len(seq_x_u),dtype = int)*elem_u)
            c_num.extend(np.ones(len(seq_x_u),dtype = int)*j)
    
    return np.vstack(X),np.array(unit_num).reshape(-1,1),np.array(c_num).reshape(-1,1)

In [None]:
def labelDistribution(NODES):
    """
        This function is used to distribute the indexes of Training and Eval datasets
    """
    global SHUFFLE_IDX
    global TRAINING_IDX
    global VAL_IDX
    global M_LABELS_TRAIN
    global M_LABELS_EVAL
    global MAX_DATASET_SIZE_TRAIN
    global MAX_DATASET_SIZE_EVAL
    global WINDOW_LEN
    global STRIDE
    
    for FC in range(0, NODES):
        with h5py.File("FC"+str(FC+1)+"/FC"+str(FC+1)+'_dev'+".h5", 'r') as hdf:
            Y_train = np.array(hdf.get('Y_dev'), dtype='float32')
            A_train = np.array(hdf.get('A_dev'), dtype='float32')
            
            units_train=A_train[:,0].reshape(-1,1)
            cycles_train=A_train[:,1].reshape(-1,1)
            
            # Create Windows for Labels
            Y_windows,_,_=sequence_generator(Y_train,units_train,cycles_train,sequence_length=WINDOW_LEN,option='last',stride = STRIDE)
            np.take(Y_windows,np.random.permutation(Y_windows.shape[0]),axis=0,out=Y_windows)
            indexes=[*range(0,len(Y_windows))]
            training = indexes[:int(len(indexes)*0.85)] 
            validation = indexes[int(len(indexes)*0.85):int(len(indexes))]
            M_LABELS_TRAIN[FC] = Y_windows[training]
            M_LABELS_EVAL[FC] = Y_windows[validation]
            if (MAX_DATASET_SIZE_TRAIN<len(Y_windows[training])):
                MAX_DATASET_SIZE_TRAIN=len(Y_windows[training])
            if (MAX_DATASET_SIZE_EVAL<len(Y_windows[validation])):
                MAX_DATASET_SIZE_EVAL=len(Y_windows[validation])
                

## Splitting data by Flight Class

In [None]:
# Global Scaler Variables
SCALER_X = MinMaxScaler()
SCALER_W = MinMaxScaler()
SCALER_Y = MinMaxScaler(feature_range=(0,1))

### Distribute Indexes (Training and Eval) and Labeles per Replica

In [None]:
labelDistribution(NODES)

In [None]:
M_TRAIN=np.empty((MAX_DATASET_SIZE_TRAIN,NODES))
M_TRAIN[:] = np.nan
for x in range(0,NODES):
    M_TRAIN[:len(M_LABELS_TRAIN[x]),x]=list(chain.from_iterable(M_LABELS_TRAIN[x]))
M_TRAIN

In [None]:
M_EVAL=np.empty((MAX_DATASET_SIZE_EVAL,NODES))
M_EVAL[:] = np.nan
for x in range(0,NODES):
    M_EVAL[:len(M_LABELS_EVAL[x]),x]=list(chain.from_iterable(M_LABELS_EVAL[x]))
M_EVAL

In [None]:
def labelSynchronization(M, localLab):
    numWorkers = len(list(zip(*M)))
    ID=list(np.unique(M[:,0]))
    synchronizedLabels=np.zeros(M.shape,dtype=int)
    synchronizedLabels[:,localLab]=range(1,len(M)+1);
    if(numWorkers>1):
        labelsAndCounters=np.zeros((len(ID),numWorkers))
        for worker in range(0,numWorkers):
            for _id in range(0,len(ID)):
                labelsAndCounters[_id,worker]=operator.countOf(M[:,worker].tolist(),ID[_id])
        for worker in range(0,numWorkers):
            if worker!=localLab:
                counterPerLabel=np.zeros((len(ID),numWorkers), dtype=int)
                for _id in range(0, len(ID)):
                    element=np.where(M[:, worker]== ID[_id])
                    for row in range(0, len(M)):
                        if (M[row,localLab]== ID[_id]):
                            synchronizedLabels[row,worker]=(list(element)[0])[counterPerLabel[_id,numWorkers-1]]+1
                            counterPerLabel[_id,numWorkers-1] += 1
                            if counterPerLabel[_id,numWorkers-1]>len(element):
                                counterPerLabel[_id,numWorkers-1]=1
    return synchronizedLabels


SYNCHRONIZED_LABELS_TRAIN=labelSynchronization(M_TRAIN, NODES-1)-1
SYNCHRONIZED_LABELS_EVAL=labelSynchronization(M_EVAL, NODES-1)-1

In [None]:
SYNCHRONIZED_LABELS_TRAIN

In [None]:
SYNCHRONIZED_LABELS_EVAL

In [None]:
def batch_3d_array(arr, batch_size):
    """
    Batch a 3D NumPy array into smaller chunks.

    Parameters:
    - arr: 3D NumPy array
    - batch_size: Size of each batch along the first dimension

    Returns:
    - NumPy array of batches, where each batch is a 3D NumPy array
    """
    shape = arr.shape
    if len(shape) != 3:
        raise ValueError("Input array must be 3D.")

    num_batches = (shape[0] + batch_size - 1) // batch_size
    batches = np.empty((num_batches, batch_size, shape[1], shape[2]), dtype=arr.dtype)

    for i in range(num_batches):
        start = i * batch_size
        end = min((i + 1) * batch_size, shape[0])
        batch = arr[start:end, :, :]
        batches[i, :end - start, :, :] = batch

    return batches

def batch_2d_array(arr, batch_size):
    """
    Batch a 2D NumPy array into smaller chunks.

    Parameters:
    - arr: 2D NumPy array
    - batch_size: Size of each batch

    Returns:
    - NumPy array of batches, where each batch is a 2D NumPy array
    """
    shape = arr.shape
    if len(shape) != 2:
        raise ValueError("Input array must be 2D.")

    num_batches = (shape[0] + batch_size - 1) // batch_size
    batches = np.empty((num_batches, batch_size, shape[1]), dtype=arr.dtype)

    for i in range(num_batches):
        start = i * batch_size
        end = min((i + 1) * batch_size, shape[0])
        batch = arr[start:end, :]
        batches[i, :end - start, :] = batch

    return batches
def batch_data_2d(test, batch_size):
    m,n = test.shape
    S = test.itemsize
    if not batch_size:
        batch_size = m
    count_batches = m//batch_size
    # Batches which can be covered fully
    test_batches = as_strided(test, shape=(count_batches, batch_size, n), strides=(batch_size*n*S,n*S,S)).copy()
    covered = count_batches*batch_size
    if covered < m:
        rest = test[covered:,:]
        rm, rn = rest.shape
        mismatch = batch_size - rm
        last_batch = np.vstack((rest,np.zeros((mismatch,rn)))).reshape(1,-1,n)
        return np.vstack((test_batches,last_batch))
    return test_batches

def batch_data_3d(test, batch_size):
    m,n,p = test.shape
    S = test.itemsize
    if not batch_size:
        batch_size = m
    count_batches = m//batch_size
    # Batches which can be covered fully
    test_batches = as_strided(test, shape=(count_batches, batch_size, n, p), strides=(batch_size*n*p*S,n*p*S,p*S,S)).copy()
    covered = count_batches*batch_size
    if covered < m:
        rest = test[covered:,:,:]
        rm, rn, rp = rest.shape
        mismatch = batch_size - rm
        last_batch = np.vstack((rest,np.zeros((mismatch,rn,rp)))).reshape(1,-1,n,p)
        return np.vstack((test_batches,last_batch))
    return test_batches

In [None]:

def fn_data_partition_train(NODES, evaluation=False):
    
    # Global Variables
    global SCALER_X
    global SCALER_W
    global SCALER_Y
    global WINDOW_LEN
    global STRIDE
    global BATCH_SIZE
    global TRAINING_IDX
    global VAL_IDX
    
    distributedData=[]
    for FC in range(0, NODES):
        DATA_REPLICA_ID = []
        # Load data DEV
        with h5py.File("FC"+str(FC+1)+"/FC"+str(FC+1)+'_dev'+".h5", 'r') as hdf:
            # Development set
            W_train = np.array(hdf.get('W_dev'), dtype='float32')             # W
            X_s_train = np.array(hdf.get('X_s_dev'), dtype='float32')         # X_s
            Y_train = np.array(hdf.get('Y_dev'), dtype='float32')             # RUL                  
            A_train = np.array(hdf.get('A_dev'), dtype='float32')

            # Varnams
            W_var = np.array(hdf.get('W_var'))
            X_s_var = np.array(hdf.get('X_s_var'))  
            X_v_var = np.array(hdf.get('X_v_var')) 
            T_var = np.array(hdf.get('T_var'))
            A_var = np.array(hdf.get('A_var'))

            # from np.array to list dtype U4/U5
            W_var = list(np.array(W_var, dtype='U20'))
            X_s_var = list(np.array(X_s_var, dtype='U20'))  
            X_v_var = list(np.array(X_v_var, dtype='U20')) 
            T_var = list(np.array(T_var, dtype='U20'))
            A_var = list(np.array(A_var, dtype='U20'))
            

            units_train=A_train[:,0].reshape(-1,1)
            cycles_train=A_train[:,1].reshape(-1,1)
                
            X_s_train = SCALER_X.fit_transform(X_s_train)
            W_train = SCALER_W.fit_transform(W_train)
            Y_train = SCALER_Y.fit_transform(Y_train)
            
            X_windows, _, _=sequence_generator(X_s_train,units_train,cycles_train,sequence_length=WINDOW_LEN,stride = STRIDE)
            W_windows,_,_=sequence_generator(W_train,units_train,cycles_train,sequence_length=WINDOW_LEN,stride = STRIDE)
            Y_windows,_,_=sequence_generator(Y_train,units_train,cycles_train,sequence_length=WINDOW_LEN,option='last',stride = STRIDE)
            
            if evaluation:
                X_=X_windows[SYNCHRONIZED_LABELS_EVAL[:,FC]]
                X_=X_[:,:,:]
                
                W_=W_windows[SYNCHRONIZED_LABELS_EVAL[:,FC]]
                W_=W_[:,:,:]
                
                y_=Y_windows[SYNCHRONIZED_LABELS_EVAL[:,FC]]
                y_=y_[:,:]
                
            else:
                X_=X_windows[SYNCHRONIZED_LABELS_TRAIN[:,FC]]
                X_=X_[:,:,:]
                
                
                W_=W_windows[SYNCHRONIZED_LABELS_TRAIN[:,FC]]
                W_=W_[:,:,:]
                
                
                y_=Y_windows[SYNCHRONIZED_LABELS_TRAIN[:,FC]]
                y_=y_[:,:]
                
            #X_=batch_3d_array(np.dstack((X_,W_)),BATCH_SIZE)
            X_ = batch_data_3d(X_,BATCH_SIZE)
            W_ = batch_data_3d(W_,BATCH_SIZE)
            y_= batch_data_2d(y_,BATCH_SIZE)
        for i in range(0,len(y_)):
            DATA_REPLICA_ID.append(((tf.convert_to_tensor(X_[i], dtype=tf.float32),tf.convert_to_tensor(W_[i], dtype=tf.float32)),tf.convert_to_tensor(y_[i], dtype=tf.float32)))
        distributedData.append(DATA_REPLICA_ID)
    return distributedData

def fn_data_partition_test_data_decentralized(NODES):
    
    # Global Variables
    global SCALER_X
    global SCALER_W
    global SCALER_Y
    global WINDOW_LEN
    global STRIDE
    global BATCH_SIZE
    
    distributedData=[]
    for FC in range(0, NODES):
        DATA_REPLICA_ID = []
        # Load data DEV
        with h5py.File("FC"+str(FC+1)+"/FC"+str(FC+1)+'_test'+".h5", 'r') as hdf:
            # Development set
            W_test = np.array(hdf.get('W_test'), dtype='float32')             # W
            X_s_test = np.array(hdf.get('X_s_test'), dtype='float32')         # X_s
            Y_test = np.array(hdf.get('Y_test'), dtype='float32')             # RUL                  
            A_test = np.array(hdf.get('A_test'), dtype='float32')

            # Varnams
            W_var = np.array(hdf.get('W_var'))
            X_s_var = np.array(hdf.get('X_s_var'))  
            X_v_var = np.array(hdf.get('X_v_var')) 
            T_var = np.array(hdf.get('T_var'))
            A_var = np.array(hdf.get('A_var'))

            # from np.array to list dtype U4/U5
            W_var = list(np.array(W_var, dtype='U20'))
            X_s_var = list(np.array(X_s_var, dtype='U20'))  
            X_v_var = list(np.array(X_v_var, dtype='U20')) 
            T_var = list(np.array(T_var, dtype='U20'))
            A_var = list(np.array(A_var, dtype='U20'))
            

            units_test=A_test[:,0].reshape(-1,1)
            cycles_test=A_test[:,1].reshape(-1,1)
                
            X_s_test = SCALER_X.fit_transform(X_s_test)
            W_test = SCALER_W.fit_transform(W_test)
            Y_test = SCALER_Y.fit_transform(Y_test)
            
            X_windows, _, _=sequence_generator(X_s_test,units_test,cycles_test,sequence_length=WINDOW_LEN,stride = STRIDE)
            W_windows,_,_=sequence_generator(W_test,units_test,cycles_test,sequence_length=WINDOW_LEN,stride = STRIDE)
            Y_windows,_,_=sequence_generator(Y_test,units_test,cycles_test,sequence_length=WINDOW_LEN,option='last',stride = STRIDE)
            
            X_=X_windows[:,:,:]
            W_=W_windows[:,:,:]
            y_=Y_windows[:,:]
            DATA_REPLICA_ID.append(((tf.convert_to_tensor(X_, dtype=tf.float32),tf.convert_to_tensor(W_, dtype=tf.float32)),tf.convert_to_tensor(y_, dtype=tf.float32)))
        distributedData.append(DATA_REPLICA_ID)
    return distributedData

def fn_data_partition_test_data_centralized(NODES):
    
    # Global Variables
    global SCALER_X
    global SCALER_W
    global SCALER_Y
    global WINDOW_LEN
    global STRIDE
    global BATCH_SIZE
    
    distributedData=[]
    DATA_REPLICA_ID = []
    for FC in range(0, NODES):
        # Load data DEV
        with h5py.File("FC"+str(FC+1)+"/FC"+str(FC+1)+'_test'+".h5", 'r') as hdf:
            # Development set
            W_test = np.array(hdf.get('W_test'), dtype='float32')             # W
            X_s_test = np.array(hdf.get('X_s_test'), dtype='float32')         # X_s
            Y_test = np.array(hdf.get('Y_test'), dtype='float32')             # RUL                  
            A_test = np.array(hdf.get('A_test'), dtype='float32')

            # Varnams
            W_var = np.array(hdf.get('W_var'))
            X_s_var = np.array(hdf.get('X_s_var'))  
            X_v_var = np.array(hdf.get('X_v_var')) 
            T_var = np.array(hdf.get('T_var'))
            A_var = np.array(hdf.get('A_var'))

            # from np.array to list dtype U4/U5
            W_var = list(np.array(W_var, dtype='U20'))
            X_s_var = list(np.array(X_s_var, dtype='U20'))  
            X_v_var = list(np.array(X_v_var, dtype='U20')) 
            T_var = list(np.array(T_var, dtype='U20'))
            A_var = list(np.array(A_var, dtype='U20'))
        
        if FC==0:
            W_test_aux = W_test
            X_s_test_aux = X_s_test
            Y_test_aux = Y_test
            A_test_aux = A_test
        if FC!=0:
            W_test_aux = np.concatenate((W_test_aux, W_test), axis=0)  
            X_s_test_aux = np.concatenate((X_s_test_aux, X_s_test), axis=0)
            Y_test_aux = np.concatenate((Y_test_aux, Y_test), axis=0) 
            A_test_aux = np.concatenate((A_test_aux, A_test), axis=0) 

    units_test=A_test_aux[:,0].reshape(-1,1)
    cycles_test=A_test_aux[:,1].reshape(-1,1)
                
    X_s_test = SCALER_X.fit_transform(X_s_test_aux)
    W_test = SCALER_W.fit_transform(W_test_aux)
    Y_test = SCALER_Y.fit_transform(A_test_aux)
            
    X_windows, _, _=sequence_generator(X_s_test,units_test,cycles_test,sequence_length=WINDOW_LEN,stride = STRIDE)
    W_windows,_,_=sequence_generator(W_test,units_test,cycles_test,sequence_length=WINDOW_LEN,stride = STRIDE)
    Y_windows,_,_=sequence_generator(Y_test,units_test,cycles_test,sequence_length=WINDOW_LEN,option='last',stride = STRIDE)
            
    X_=X_windows[:,:,:]
    W_=W_windows[:,:,:]
    y_=Y_windows[:,:]
    X_ = batch_data_3d(X_,BATCH_SIZE)
    W_ = batch_data_3d(W_,BATCH_SIZE)
    y_= batch_data_2d(y_,BATCH_SIZE)
    
    for i in range(0,len(y_)):
        DATA_REPLICA_ID.append(((tf.convert_to_tensor(X_[i], dtype=tf.float32),tf.convert_to_tensor(W_[i], dtype=tf.float32)),tf.convert_to_tensor(y_[i], dtype=tf.float32)))
    distributedData.append(DATA_REPLICA_ID)
    return distributedData

In [None]:
distributedDataTrain = fn_data_partition_train(NODES,False)
distributedDataEval = fn_data_partition_train(NODES,True)

In [None]:
def value_fn_train(ctx):
    return distributedDataTrain[ctx.replica_id_in_sync_group]
def value_fn_eval(ctx):
    return distributedDataEval[ctx.replica_id_in_sync_group]

In [None]:
distributed_values_train = strategy.experimental_distribute_values_from_function(value_fn_train)
distributed_values_eval = strategy.experimental_distribute_values_from_function(value_fn_eval)

# Create Model

In [None]:
#RUL MODEL

def predictor(t=64,
      feature_X_in=14,
      feature_W_in=4,
      feature_H_in=1,
      feature_out_size=1,
      activation='relu',
      filter = [10,10,1],
      filter_size = 10,
      useH=True):
    
    '''
    useH: if True, use H as input
        [X,W,H] -> Y 
    else:
        [X,W] -> Y
    '''

    x_in=layers.Input(shape=(t,feature_X_in),name="X_in")
    w_in = layers.Input(shape=(t,feature_W_in),name="W_in")
    
    
    if useH:
      h_in = layers.Input(shape=(t,feature_H_in),name="H_in")
      # h_in = layers.Input(shape=(1,1),name="H_in")
      x = tf.concat([x_in,w_in, h_in],-1)
    else: 
      x = tf.concat([x_in,w_in],-1)
      
    for i in filter:
      x = layers.Conv1D(i,filter_size,1,padding='same',activation = activation)(x)
      # x = layers.BatchNormalization()(x)
      
    x = layers.Flatten()(x)
    y = layers.Dense(50,activation = activation)(x)
    y = layers.Dense(feature_out_size,activation = 'linear')(y)

    if useH:
      model = models.Model([x_in,w_in,h_in], y)
    else:
      model = models.Model([x_in,w_in], y)

    return model

In [None]:
# Create a checkpoint directory to store the checkpoints.
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")

## Define the loss function

Recall that the loss function consists of one or two parts:

  * The **prediction loss** measures how far off the model's predictions are from the training labels for a batch of training examples. It is computed for each labeled example and then reduced across the batch by computing the average value.
  * Optionally, **regularization loss** terms can be added to the prediction loss, to steer the model away from overfitting the training data. A common choice is L2 regularization, which adds a small fixed multiple of the sum of squares of all model weights, independent of the number of examples. The model above uses L2 regularization to demonstrate its handling in the training loop below.

For training on a single machine with a single GPU/CPU, this works as follows:

  * The prediction loss is computed for each example in the batch, summed across the batch, and then divided by the batch size.
  * The regularization loss is added to the prediction loss.
  * The gradient of the total loss is computed w.r.t. each model weight, and the optimizer updates each model weight from the corresponding gradient.

With `tf.distribute.Strategy`, the input batch is split between replicas.
For example, let's say you have 4 GPUs, each with one replica of the model. One batch of 256 input examples is distributed evenly across the 4 replicas, so each replica gets a batch of size 64: We have `256 = 4*64`, or generally `GLOBAL_BATCH_SIZE = num_replicas_in_sync * BATCH_SIZE_PER_REPLICA`.

Each replica computes the loss from the training examples it gets and computes the gradients of the loss w.r.t. each model weight. The optimizer takes care that these **gradients are summed up across replicas** before using them to update the copies of the model weights on each replica.

*So, how should the loss be calculated when using a `tf.distribute.Strategy`?*

  * Each replica computes the prediction loss for all examples distributed to it, sums up the results and divides them by `num_replicas_in_sync * BATCH_SIZE_PER_REPLICA`, or equivently, `GLOBAL_BATCH_SIZE`.
  * Each replica compues the regularization loss(es) and divides them by
  `num_replicas_in_sync`.

Compared to non-distributed training, all per-replica loss terms are scaled down by a factor of `1/num_replicas_in_sync`. On the other hand, all loss terms -- or rather, their gradients -- are summed across that number of replicas before the optimizer applies them. In effect, the optimizer on each replica uses the same gradients as if a non-distributed computation with `GLOBAL_BATCH_SIZE` had happened. This is consistent with the distributed and undistributed behavior of Keras `Model.fit`. See the [Distributed training with Keras](./keras.ipynb) tutorial on how a larger gloabl batch size enables to scale up the learning rate.

In [None]:
with strategy.scope():
    def compute_loss_batch(labels, predictions, model_losses):
        per_example_loss = (labels - predictions)**2  # Sample error
        loss = tf.math.sqrt(tf.nn.compute_average_loss(per_example_loss)) # Batch Error
        return loss

In [None]:
with strategy.scope():
    eval_mae = tf.keras.metrics.MeanAbsoluteError()
    train_rmse = tf.keras.metrics.RootMeanSquaredError()
    eval_rmse = tf.keras.metrics.RootMeanSquaredError()

In [None]:
# A model, an optimizer, and a checkpoint must be created under `strategy.scope`.
with strategy.scope():
    model = predictor(t=50,useH=False,filter = LAYERS)
    optimizer = SGD(learning_rate=LEARNING_RATE)
    checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)

## Auxiliar Funcions for Training

In [None]:
def train_step_batch(inputs):
    input_signals, labels = inputs
    with tf.GradientTape() as tape:
        predictions = model(input_signals, training=True)
        loss = compute_loss_batch(labels, predictions, model.losses) # Batch Error
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_rmse.update_state(labels, predictions)
    return loss
def train_step_sample(inputs):
    input_signals, labels = inputs
    with tf.GradientTape() as tape:
        predictions = model(input_signals, training=True)
    return predictions

def compute_loss_fedLabSync(inputs, collaborativePredictions):
    input_signals, labels = inputs
    with tf.GradientTape() as tape:
        predictions = model(input_signals, training=True)
        loss = compute_loss_batch(labels, (collaborativePredictions+predictions)/2, model.losses) # Batch Error
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_rmse.update_state(labels, (collaborativePredictions+predictions)/2)
    return loss
    
def test_step(inputs, collaborativePredictions):
    input_signals, labels = inputs
    eval_mae.update_state(labels, collaborativePredictions)
    eval_rmse.update_state(labels, collaborativePredictions)

In [None]:
# with the distributed input.
@tf.function
def distributed_train_step_batch(dataset_inputs):
    per_replica_losses = strategy.run(train_step_batch, args=(dataset_inputs,)) 
    return strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses,
                         axis=None)
@tf.function
def collaborative_predictions(dataset_inputs):
    per_replica_predictions= strategy.run(train_step_sample, args=(dataset_inputs,))
    return strategy.reduce(tf.distribute.ReduceOp.MEAN, per_replica_predictions,  
                         axis=None)
@tf.function
def local_collaborative_loss(collaborative_predictions, dataset_inputs):
    return strategy.run(compute_loss_fedLabSync, args=(dataset_inputs, collaborative_predictions,))

@tf.function
def distributed_test_step_batch(dataset_inputs,collaborative_predictions):
    return strategy.run(test_step, args=(dataset_inputs,collaborative_predictions,))

# Training Procedure

In [None]:
for epoch in range(EPOCHS):
    # TRAIN LOOP
    num_batches = 0
    for x in distributed_values_train:
        #distributed_train_step_batch(x)
        colaboratePrediction = collaborative_predictions(x)
        total_loss = local_collaborative_loss(colaboratePrediction, x)
        num_batches += 1
    
    for x in distributed_values_eval:
        colaboratePrediction = collaborative_predictions(x)
        distributed_test_step_batch(x,colaboratePrediction)

    if epoch % 2 == 0:
        checkpoint.save(checkpoint_prefix)

    template = ("Epoch {}, Train_RMSE: {}, Eval MAE: {}, "
              "Eval_RMSE: {}")
    print(template.format(epoch + 1, train_rmse.result(), eval_mae.result(),
                         eval_rmse.result()))
    eval_mae.reset_states()
    train_rmse.reset_states()
    eval_rmse.reset_states()

# Save Model

In [None]:
def save_model_per_replica(model,FC):
  model.save_weights("RUL_MODEL/FC"+str(FC)+"/"+"model_federated.h5")
  #model.save_weights("nodes8_main_model_t6.h5")
  # serialize model to JSON
  model_json = model.to_json()
  with open("RUL_MODEL/FC"+str(FC)+"/"+"model_federated.json", "w") as json_file:
      json_file.write(model_json)
    
def current_replica(ctx):
    return ctx.replica_id_in_sync_group+1

with strategy.scope():
  replica = strategy.experimental_distribute_values_from_function(current_replica)
  strategy.run(save_model_per_replica, args=(model,replica,))

# Load Model

In [None]:
def load_model_per_replica(FC):
  json_file = open("RUL_MODEL/FC"+str(FC)+"/"+"model_federated.json", 'r')
  loaded_model_json = json_file.read()
  json_file.close()
  return model_from_json(loaded_model_json)

with strategy.scope():
  replica = strategy.experimental_distribute_values_from_function(current_replica)
  model= strategy.run(load_model_per_replica, args=(replica,))

# Testing

In [None]:
distributedDataTest = fn_data_partition_test_data_centralized(NODES)
def value_fn_test(ctx):
    return distributedDataTest[0]

distributed_values_test_data_decentralized = strategy.experimental_distribute_values_from_function(value_fn_test)

for x in distributed_values_test_data_decentralized:
        colaboratePrediction = collaborative_predictions(x)
        print(len(colaboratePrediction))