# Collaborative Remaining Useful Life ($\textit{RUL}$) estimation of turbofan engines

This notebook simulates a collaborative prognostics problem, the Remaining Useful Life estimation of turbofan engines located at different parties (airlines). Collaborative problem simulates the participation of three airlines differing in flight lengths (Flight Classes): short-length flights (i.e., flight class 1), medium-length flights (i.e., flight class 2), or long-length flights (i.e., flight class 2).

| Flight Class   | Flight Length [h]
| :-----------:  | :-----------:    
| 1              |    1 to 3        
| 2              |    3 to 5        
| 3              |    5 to 7  

Those flight classes were defined in the **new** C-MAPSS (Commercial Modular Aero-Propulsion System Simulation) dataset from NASA for aircraft engines. More details about the generation process can be found at https://www.mdpi.com/2306-5729/6/1/5.  

This notebook reused the inception-based CNN network from (https://doi.org/10.36001/phmconf.2021.v13i1.3109) and its pre-processing precedures to estimate the $\textit{RUL}$ of engines experiencing a determined flight class (FC). Datasets (FC*_dev.h5 and FC*_test.h5) for each FC are generated by executing:

- 1) Spliting a given dataset by Flight Class.ipynb
- 2) Concatenating datasets by Flight Class.ipynb

The collaborative problem, simulated using the Federated Label Synchornization FedLabSync algorithm, uses "TensorFlow Parallel Computing" to distribute the work along multiple GPUs.  To run this code, a cluster of GPUs must be configured. Hyperparameters for distributed Machine Learning experiments are configured in "Global Variables" Section, e.g.,  number of nodes (parties participating in the ferated network) and the current node for which the federated model is trained. 

The sucess of FedLanSync algorithm lies in using a label synchronization strategy to create SYNCHRONIZED_LABELS_* matrices. Multiplie options are defined in "Label Synchornization" Section. 





In [None]:
# Datasets
import h5py
import math
import pandas as pd
import operator
from pandas import DataFrame
# Helper libraries
import numpy as np
from numpy.lib.stride_tricks import as_strided
from itertools import chain
import os
import random
import time
import itertools
import pyarrow as pa
import pyarrow.parquet as pq
# Import TensorFlow
import tensorflow as tf
#Keras
from keras.models import Sequential,load_model
from keras.models import model_from_json
from keras import optimizers
import keras.backend as K
from tensorflow import keras
from keras import layers
from keras import models
from keras import regularizers
from keras import layers
import keras.backend as K
#Matplot
import matplotlib.pyplot as plt
import matplotlib
from matplotlib import gridspec
#sklearn
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from scipy import spatial
# EVALUATION modules
from scipy.stats import spearmanr
from pickle import dump
from pickle import load
params = {'legend.fontsize': 8,
          'figure.figsize': (9,6),
         'axes.labelsize': 20,
         'axes.titlesize':20,
         'xtick.labelsize':'xx-large',
         'axes.linewidth' : 2,
         'ytick.labelsize':'xx-large'}

plt.rcParams.update(params)

In [None]:
gpus = tf.config.list_physical_devices(device_type = 'GPU')
print(gpus)
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

## Create a strategy to distribute the variables and the graph

In [None]:
strategy = tf.distribute.MirroredStrategy(["GPU:1", "GPU:0","GPU:3"])

In [None]:
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
tf.compat.v1.enable_eager_execution()

# Global Variables

In [None]:
# Hyperparameters
NODES = 3
NODE = 2
WINDOW_LEN = 30
STRIDE = 1
BATCH_SIZE = 128
LEARNING_RATE = 0.01
BATCH_SIZE = 128
EPOCHS = 900
DOWNSAMPLING_STEP = [2,2,2]
# EARLY STOPPING
PATIENCE=5
COLLAB_LOSS_WEIGHT=0.01
PIECE_WISE=True
CYCLE_CORRECTION=False
PCA_CORRECTION = True
PCA_W=True
#FC1
#seed = 5
#seed=257
#seed=922

#FC2
#seed=257
#seed=571
#seed=111
#seed=798
#seed=345

#FC3
#seed=571
#seed=111
#seed=798
#seed=345
#seed=945

In [None]:
seed = 257
np.random.seed(seed)

In [None]:
# Global Sync Variables
M_LABELS_TRAIN=[*range(0,NODES)]
M_LABELS_EVAL=[*range(0,NODES)]
SHUFFLE_IDX = {}
TRAINING_IDX = {}
VAL_IDX = {}
MAX_DATASET_SIZE_TRAIN=0
MAX_DATASET_SIZE_EVAL=0
if PCA_CORRECTION==True:
    if PCA_W==True:
        PCA=load(open('data/federation/PCAW.pkl', 'rb'))
    else:
        PCA=load(open('data/federation/PCAX.pkl', 'rb'))

## Auxiliar Classes

In [None]:
import numpy as np

class EarlyStopping:
    def __init__(self, patience=5):
        """
        Args:
            patience (int): Number of epochs with no improvement after which training will be stopped.
        """
        self.patience = patience
        self.counter = 0
        self.best_loss = float('inf')
        self.early_stop = False

    def __call__(self, current_loss):
        """
        Args:
            current_loss (float): The validation loss in the current epoch.
        Returns:
            early_stop (bool): Whether to stop the training.
        """
        if current_loss < self.best_loss:
            self.best_loss = current_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        return self.early_stop

## Auxiliar Functions

In [None]:
def correctRUL(hi, RUL):
    if hi == 1:
      return -1
    else:
      return RUL
def correctMaxRUL(hi, RUL,MAX_RUL):
    if hi == 1:
      return MAX_RUL
    else:
      return RUL

In [None]:
def plot_predicted_true_rul(log_y_hat_test, unit_sel, Unit_test, C_test, rul_test):
    """
    Plots the predicted and true remaining useful life (RUL) for a given set of test data.
    
    Parameters:
    -----------
    log_y_hat_test : numpy.ndarray
        Logarithm of the predicted RUL for each cycle of each unit in the test data.
    unit_sel : list
        List of units to include in the plot.
    Unit_test : numpy.ndarray
        Array containing the unit numbers for each cycle in the test data.
    C_test : numpy.ndarray
        Array containing the cycle numbers for each cycle in the test data.
    rul_test : numpy.ndarray
        Array containing the true RUL for each cycle in the test data.
    
    Returns:
    --------
    None
    """
    for i in range(len(log_y_hat_test)):
        fig = plt.figure(figsize=(9, 7))
        leg = []
        
        # Plot predicted RUL
        for j in unit_sel:
            y_hat_mean, y_hat_max, y_hat_min = [], [], []
            unit = Unit_test == j
            c_test = np.sort(C_test[unit])-1
            idx = np.argsort(C_test[unit])
            y_hat_test = log_y_hat_test[i][unit]
            y_hat_test_sorted = y_hat_test[idx]
            for k in np.unique(c_test):
                y_hat_mean.append(np.mean(y_hat_test_sorted[c_test == k]))
                y_hat_max.append(np.max(y_hat_test_sorted[c_test == k]))
                y_hat_min.append(np.min(y_hat_test_sorted[c_test == k]))
            y_hat_mean = np.array(y_hat_mean, dtype=np.float64)
            y_hat_max = np.array(y_hat_max, dtype=np.float64)
            y_hat_min = np.array(y_hat_min, dtype=np.float64)
            plt.plot(np.unique(c_test), y_hat_mean, 'o', alpha=0.7, markersize=5)
            plt.fill_between(np.unique(c_test), y_hat_min, y_hat_max, alpha=0.3)
        # Plot true RUL
        plt.gca().set_prop_cycle(None)
        for j in unit_sel:        
            unit = Unit_test == j  
            c_test_unique = np.unique(np.sort(C_test[unit])-1)
            rul_test_unique = np.unique(rul_test[unit])
            rul_test_unique=np.append(rul_test_unique,np.zeros(len(c_test_unique)-len(rul_test_unique))+np.max(rul_test[unit]))
            plt.plot(c_test_unique, rul_test_unique[::-1], alpha=0.7)
            leg.append('Prediction-Traj.' + str(j))           
            leg.append('$RUL$-Traj.' + str(j))
        plt.legend(leg, loc='upper right')
        plt.ylabel(r'$Prediction$ & $RUL$ [cycles]')
        plt.xlabel('Time [cycles]')
        plt.ylim(top=90)
        plt.ylim(bottom=-5)

In [None]:
def split_sequences(input_data, sequence_length, stride = 1, option = None):
    """
     
    """
    X = list()
    
    for i in range(0,len(input_data),stride):
        # find the end of this pattern
        end_ix = i + sequence_length
        
        # check if we are beyond the dataset
        if end_ix > len(input_data):
            break
        
        # gather input and output parts of the pattern
        if option=='last':
            seq_x = input_data[end_ix-1, :]
        elif option=='next':
            seq_x = input_data[end_ix, :]
        else:
            seq_x = input_data[i:end_ix, :]
        X.append(seq_x)
    
    return np.array(X)
def sequence_generator(input_data, units, cycles, sequence_length=10,stride = 1, option=None):
    """
     # Generates dataset with windows of sequence_length      
    """  
    X = list()
    unit_num=[]
    c_num =[]
    for i, elem_u in enumerate(list(np.unique(units))):
        mask = np.ravel(units==elem_u)
        c_mask = cycles[mask]
        x_unit = input_data[mask]
        for j in np.unique(c_mask):
            mask = np.ravel(c_mask==j)
            seq_x_u = split_sequences(x_unit[mask],sequence_length, stride, option)
            X.append(seq_x_u)
            unit_num.extend(np.ones(len(seq_x_u),dtype = int)*elem_u)
            c_num.extend(np.ones(len(seq_x_u),dtype = int)*j)
    
    return np.vstack(X),np.array(unit_num).reshape(-1,1),np.array(c_num).reshape(-1,1)


def sequence_generator_per_unit(input_data, units, cycles, sequence_length=10, stride =1,option=None):
    """
     # Generates dataset with windows of sequence_length      
    """  
    X = list()
    unit_num=[]
    c_num =[]
    for i, elem_u in enumerate(list(np.unique(units))):
        mask = np.ravel(units==elem_u)
        x_unit = input_data[mask]
        seq_x_u = split_sequences(x_unit,sequence_length, stride, option)
        X.append(seq_x_u)
        unit_num.extend(np.ones(len(seq_x_u),dtype = int)*elem_u)
        c_num.append(split_sequences(cycles[mask],sequence_length, stride, option))
    
    return np.vstack(X),np.array(unit_num).reshape(-1,1),np.vstack(c_num)

In [None]:
def labelDistribution(NODES):
    """
        This function is used to distribute the indexes of Training and Eval datasets
    """
    global SHUFFLE_IDX
    global TRAINING_IDX
    global VAL_IDX
    global M_LABELS_TRAIN
    global M_LABELS_EVAL
    global MAX_DATASET_SIZE_TRAIN
    global MAX_DATASET_SIZE_EVAL
    global WINDOW_LEN
    global STRIDE
    global PIECE_WISE
    global CYCLE_CORRECTION
    global PCA
    global PCA_CORRECTION
    
    for FC in range(0, NODES):
        with h5py.File("FC"+str(FC+1)+"/FC"+str(FC+1)+'_dev'+".h5", 'r') as hdf:
            Y_train = np.array(hdf.get('Y_dev'), dtype='float32')
            A_train = np.array(hdf.get('A_dev'), dtype='float32')
            if PCA_CORRECTION==True:
                if PCA_W==True:
                    W_train = np.array(hdf.get('W_dev'), dtype='float32')
                    W_train = W_train[::DOWNSAMPLING_STEP[FC],:]
                else:
                    X_train = np.array(hdf.get('X_dev'), dtype='float32')
                    X_train = X_train[::DOWNSAMPLING_STEP[FC],:]
            
            Y_train = Y_train[::DOWNSAMPLING_STEP[FC],:]
            A_train = A_train[::DOWNSAMPLING_STEP[FC],:]
            
            units_train=A_train[:,0].reshape(-1,1)
            cycles_train=A_train[:,1].reshape(-1,1)
            hi_train = A_train[:,-1]
            
            if PCA_CORRECTION==True:
                if PCA_W==True:
                    W_train=PCA.transform(W_train)
                    PCA_windows,_,_=sequence_generator_per_unit(W_train,units_train,cycles_train,sequence_length=WINDOW_LEN,option='last',stride = STRIDE)
                else:
                    X_train=PCA.transform(X_train)
                    PCA_windows,_,_=sequence_generator_per_unit(X_train,units_train,cycles_train,sequence_length=WINDOW_LEN,option='last',stride = STRIDE)
                
            if CYCLE_CORRECTION==True:
                df = pd.DataFrame(data={'unit': list(itertools.chain.from_iterable(units_train)), 'cycle': list(itertools.chain.from_iterable(cycles_train))})
                gb1 = df.groupby(['unit'])['cycle'].transform(lambda x: np.max(x))
                df['max_cycle'] = gb1
                df['cycle'] = 100-df['max_cycle']+df['cycle']
                noise = np.random.normal(-0.0001, 0.0001, len(df['cycle']))
                df['cycle'] = df['cycle']+noise
                cycles_train_aux=df['cycle'].to_numpy().reshape(-1, 1)
            
            if PIECE_WISE==True:
                df_hs_unit_train = DataFrame({'unit': units_train.reshape(-1).astype(int),'RUL': Y_train.reshape(-1), 'hi': hi_train.reshape(-1)})
                df_hs_unit_train['RUL']=df_hs_unit_train.apply(lambda row: correctRUL(row['hi'],row['RUL']), axis=1)

                pd_aux=DataFrame(df_hs_unit_train.groupby('unit')['RUL'].max()).reset_index()
                df_hs_unit_train['RUL']=df_hs_unit_train.apply(lambda row: correctMaxRUL(row['hi'],row['RUL'], float(pd_aux.iloc[pd_aux.index[pd_aux['unit'] == row['unit']]]['RUL'])), axis=1)
                Y_train=df_hs_unit_train['RUL'].to_numpy().reshape(len(df_hs_unit_train),1)
    
            # Create Windows for Labels
            Y_train=Y_train.reshape(-1)
            Y_train=Y_train.reshape(len(Y_train),1)
            Y_windows,_,_=sequence_generator_per_unit(Y_train,units_train,cycles_train,sequence_length=WINDOW_LEN,option='last',stride = STRIDE)
            
            if CYCLE_CORRECTION==True:
                C_windows,_,_=sequence_generator_per_unit(cycles_train_aux,units_train,cycles_train,sequence_length=WINDOW_LEN,option='last',stride = STRIDE)
                if PCA_CORRECTION==True:
                    Y_windows=np.concatenate((Y_windows, C_windows, PCA_windows), axis=1)
                else:
                    Y_windows=np.concatenate((Y_windows, C_windows), axis=1)
                    
            ids = [*range(0,Y_windows.shape[0])]
            random.shuffle(ids)
            SHUFFLE_IDX[FC] = ids
            TRAINING_IDX[FC] = ids[:int(len(ids)*0.85)]
            VAL_IDX[FC] = ids[int(len(ids)*0.85)+1:int(len(ids))]
            
            M_LABELS_TRAIN[FC] = Y_windows[TRAINING_IDX[FC]]
            M_LABELS_EVAL[FC] = Y_windows[VAL_IDX[FC]]
            
            if (MAX_DATASET_SIZE_TRAIN<len(Y_windows[TRAINING_IDX[FC]])):
                MAX_DATASET_SIZE_TRAIN=len(Y_windows[TRAINING_IDX[FC]])
            if (MAX_DATASET_SIZE_EVAL<len(Y_windows[VAL_IDX[FC]])):
                MAX_DATASET_SIZE_EVAL=len(Y_windows[VAL_IDX[FC]])
    Y_windows = []
    A_train = []
    Y_train = []    

In [None]:
labelDistribution(NODES)

## Splitting data by Flight Class

In [None]:
# Global Scaler Variables
SCALER_X = StandardScaler()
SCALER_W = StandardScaler()
SCALER_Y = MinMaxScaler(feature_range=(0,1))

### Distribute Indexes (Training and Eval) and Labeles per Replica

In [None]:
if CYCLE_CORRECTION==False:
    M_TRAIN=np.empty((NODES,MAX_DATASET_SIZE_TRAIN,1))
else:
    M_TRAIN=np.empty((NODES,MAX_DATASET_SIZE_TRAIN,2))
M_TRAIN[:] = np.nan
for x in range(0,NODES):
    M_TRAIN[x,:len(M_LABELS_TRAIN[x])]=[list(i) for i in M_LABELS_TRAIN[x] ]

In [None]:

if CYCLE_CORRECTION==False:
    M_EVAL=np.empty((NODES, MAX_DATASET_SIZE_EVAL,1))
else:
    M_EVAL=np.empty((NODES,MAX_DATASET_SIZE_EVAL,2))
    
M_EVAL[:] = np.nan
for x in range(0,NODES):
    M_EVAL[x,:len(M_LABELS_EVAL[x])]=[list(i) for i in M_LABELS_EVAL[x] ]     

# Label Synchornization

In [None]:
def find_first_nan(arr):
    nan_index = np.argmax(np.isnan(arr))
    return nan_index if np.isnan(arr[nan_index]) else None

def labelSynchronization(M, localLab):
    numWorkers = len(list(zip(*M)))                     # Number of columns
    ID=list(np.unique(M[:,localLab]))                   # List of unique Labels
    synchronizedLabels=np.zeros(M.shape,dtype=int)      
    synchronizedLabels[:,localLab]=range(0,len(M))    # Column of localLab with same indexes
    if(numWorkers>1):
        labelsAndCounters=np.zeros((len(ID),numWorkers))      # Matrix of Labels and Counters with zeros
        for worker in range(0,numWorkers):          # for each worker            
            for _id in range(0,len(ID)):            # AND for each ID                    
                labelsAndCounters[_id,worker]=operator.countOf(M[:,worker].tolist(),ID[_id]) # Count the number of instances
        for worker in range(0,numWorkers):          # for each worker different fotm local
            if worker!=localLab:                    
                counterPerLabel=np.zeros((len(ID),numWorkers), dtype=int) # Counter Per ID Matrix
                for _id in range(0, len(ID)):                             # For each ID
                    condition = lambda x: x == ID[_id]
                    elements = [index for index, element in enumerate(M[:, worker]) if condition(element)]
                    for row in range(0, len(M)):
                        if (M[row,localLab]== ID[_id]):
                            synchronizedLabels[row,worker]=elements[counterPerLabel[_id,numWorkers-1]]
                            counterPerLabel[_id,numWorkers-1] += 1
                            if counterPerLabel[_id,numWorkers-1]>=len(elements):
                                counterPerLabel[_id,numWorkers-1]=0
    return synchronizedLabels

In [None]:
def euclidean_distance(point1, point2):
    def square(x):
        return x**2

    def sqrt(x):
        return x**0.5
    if CYCLE_CORRECTION==True:
        return sqrt(sum(square(p1 - p2) for p1, p2 in zip(point1, point2)))
    else:
        return np.abs(point1-point2)

def alternative_closest_index_in_list2(closest_index, min_distance, value1, list2):
    for i, value2 in enumerate(list2):
        if ~np.isnan(value1).any() and ~np.isnan(value2).any():
            distance = euclidean_distance(value1, value2)
            if distance < min_distance:
                min_distance = distance
                closest_index = i            
    return closest_index, min_distance
    
def closest_index_in_list2(count, closest_index, min_distance, closest_indices, j,value1,list2):
    for i, value2 in enumerate(list2):
            if ~np.isnan(value1).any() and ~np.isnan(value2).any():
                distance = euclidean_distance(value1, value2)
                if distance < min_distance:
                    min_distance = distance
                    closest_index = i
                if CYCLE_CORRECTION==False:
                    if i==len(list2)-1:
                        try:
                            if len(closest_indices)> 0:
                                # count = 0
                                index = closest_indices.index(closest_index)
                                if min_distance >= alternative_closest_index_in_list2(closest_index, min_distance, value1, np.delete(list2, closest_index, axis=0))[1]:
                                    list2 = np.delete(list2, closest_index, axis=0)
                                    closest_index=closest_index_in_list2(count+1, closest_index, float('inf'), closest_indices, j,value1,list2)[0]+1
                        except ValueError:
                            1
    return closest_index, min_distance

def closest_index_for_eachND(list1, list2):
    closest_indices = []
    min_distance = float('inf')
    for j, value1 in enumerate(list1):
        closest_index = None
        count=0
        closest_index,_ = closest_index_in_list2(count,closest_index,min_distance,closest_indices,j,value1,list2)
        closest_indices.append(closest_index)

    return closest_indices

def closest_index_matrix_ND(M_labels,NODE):
    for labels_node in range(0,M_labels.shape[0]):
        #print(M_labels[:][0])
        if labels_node==0:
            if labels_node==NODE:
                M=np.array(range(0,len(M_labels[:][NODE])))
            else:
                match = np.array(closest_index_for_eachND(M_labels[:][NODE], M_labels[:][labels_node])) 
                M = match
        if labels_node!=0:
            if labels_node == NODE:
                M = np.vstack((M,np.array(range(0,len(M_labels[:][NODE])))))
            else:
                match = np.array(closest_index_for_eachND(M_labels[:][NODE], M_labels[:][labels_node]))
                M =  np.vstack((M, match))
    return np.transpose(M)
    
# Example 2D
M_labels = np.array([[[0.2], 
                      [0.6], 
                      [0.2],
                      [0.2]],
                     [[0.2],
                      [1],
                      [0.2], 
                      [np.nan],],
                     [[0.4],
                      [0.2],
                      [0.2],
                      [0.2]]])

M = closest_index_matrix_ND(M_labels,NODE)
print(M_labels.shape)


In [None]:
def cosineDistance(a, b):
    return spatial.distance.cosine(a, b)
def replaceCycle(diff, original):
    return [original[0], diff]
def find_closest_value(value, column2):
    closest_value = column2.iloc[(column2 - value).abs().idxmin()]
    return closest_value
def find_closest_idx(value, column2):
        euclidean_distance = lambda x: np.linalg.norm(np.array(x) - np.array(value))
        return column2.iloc[column2.apply(lambda row: euclidean_distance(row)).idxmin()]
def closestIndexWithCosineDifference(M_labels, NODE):
    data={}
    for column in range(0,len(M_labels)):
        data[str(column)]= M_labels[column]
    df=pd.DataFrame(data)
    NODES = len(df.columns)
    for column in range(0,len(M_labels)):
        df["Cossine_diff"+str(NODE)+"_"+str(column)]= df.apply(lambda row: cosineDistance(row[str(NODE)], row[str(column)]), axis=1)
        if column!=NODE:
            df["Cossine_diff"+str(NODE)+"_"+str(column)]= df["Cossine_diff"+str(NODE)+"_"+str(column)]    
        df["aux"+str(column)] = df[str(column)].str[0]+1j*abs(df["Cossine_diff"+str(NODE)+"_"+str(column)]+ np.random.normal(-0.0001, 0.0001, len(df["Cossine_diff"+str(NODE)+"_"+str(column)])))
        #df[str(column)]= df.apply(lambda row: replaceCycle(row["Cossine_diff"+str(NODE)+"_"+str(column)], row[str(column)]), axis=1)
        df.drop("Cossine_diff"+str(NODE)+"_"+str(column), axis=1, inplace=True)
    
    for column in range(0,NODES):
            if column==NODE:
                df['IndexIn'+str(column)] = df.index
            else:
                df['IndexIn'+str(column)] = df["aux"+str(NODE)].apply(find_closest_value, column2=df["aux"+str(column)]).apply(lambda x: df["aux"+str(column)][df["aux"+str(column)] == x].index[0])
    for column in range(0,NODES):
        df.drop(str(column), axis=1, inplace=True)
        df.drop("aux"+str(column), axis=1, inplace=True)  
    return df.to_numpy()

def closestIndexWithCosineDifferenceAndPCA(M, NODE):
    M_labels=M[:,:,:2].tolist()
    M_PCA=M[:,:,2:]*0.01
    data={}
    for column in range(0,len(M_labels)):
        data[str(column)]= M_labels[column]
    df=pd.DataFrame(data)
    NODES = len(df.columns)
    for column in range(0,len(M_labels)):
        df["Cossine_diff"+str(NODE)+"_"+str(column)]= df.apply(lambda row: cosineDistance(row[str(NODE)], row[str(column)]), axis=1)
        if column!=NODE:
            df["Cossine_diff"+str(NODE)+"_"+str(column)]= df["Cossine_diff"+str(NODE)+"_"+str(column)]    
        df["aux"+str(column)] = df[str(column)].str[0]+1j*abs(df["Cossine_diff"+str(NODE)+"_"+str(column)]+ M_PCA[column].reshape(-1))
        #df[str(column)]= df.apply(lambda row: replaceCycle(row["Cossine_diff"+str(NODE)+"_"+str(column)], row[str(column)]), axis=1)
        df.drop("Cossine_diff"+str(NODE)+"_"+str(column), axis=1, inplace=True)
    
    for column in range(0,NODES):
            if column==NODE:
                df['IndexIn'+str(column)] = df.index
            else:
                df['IndexIn'+str(column)] = df["aux"+str(NODE)].apply(find_closest_value, column2=df["aux"+str(column)]).apply(lambda x: df["aux"+str(column)][df["aux"+str(column)] == x].index[0])
    for column in range(0,NODES):
        df.drop(str(column), axis=1, inplace=True)
        df.drop("aux"+str(column), axis=1, inplace=True)  
    return df.to_numpy()

M_labels=np.array([[[1.0, 2.0], [1, 2], [7, 8]],
                  [[10, 10], [1, 2], [16, 17]],
                  [[1,2], [1,2], [2,3]]]).tolist()
M_labels_PCA=np.array([[[1.0, 2.0, 0.2], [1, 2, 0.3], [7, 8, 0.4]],
                  [[10, 10, 0.1], [1, 2, 0.3], [16, 17, 0.5]],
                  [[1,2,0.6], [1,2,0.2], [2,3,0.34]]])
if PCA_CORRECTION==True:
    print(closestIndexWithCosineDifferenceAndPCA(M_labels_PCA, 0))
else:
    print(closestIndexWithCosineDifference(M_labels, 0))


In [None]:
#firstNaN_Train=find_first_nan(M_TRAIN[NODE])
#firstNaN_Eval=find_first_nan(M_EVAL[NODE])

In [None]:
for J in range(0,NODES):
    while (find_first_nan(M_EVAL[J]) and find_first_nan(M_EVAL[J])<M_EVAL.shape[1]):
        M_EVAL[J,find_first_nan(M_EVAL[J]):,:]=M_EVAL[J,:M_EVAL.shape[1]-find_first_nan(M_EVAL[J]),:]
for J in range(0,NODES):
    while (find_first_nan(M_TRAIN[J]) and find_first_nan(M_TRAIN[J])<M_TRAIN.shape[1]):
        M_TRAIN[J,find_first_nan(M_TRAIN[J]):,:]=M_TRAIN[J,:M_TRAIN.shape[1]-find_first_nan(M_TRAIN[J]),:]

In [None]:
#M_TRAIN=M_TRAIN[:,:firstNaN_Train,:]
#M_EVAL=M_EVAL[:,:firstNaN_Eval,:]

In [None]:
import numpy as np
import pandas as pd
def find_closest_value(value, column2):
    closest_value = column2.iloc[(column2 - value).abs().idxmin()]
    return closest_value

def closestIndex(M_labels, NODE):
    df = pd.DataFrame()
    for column in range(0,M_labels.shape[0]):
        df[str(column)]= M_labels[column]
    NODES = len(df.columns)
    for column in range(0,NODES):
        if column==NODE:
            df['IndexIn'+str(column)] = df.index
        else:
            df['IndexIn'+str(column)] = df[str(NODE)].apply(find_closest_value, column2=df[str(column)]).apply(lambda x: df[str(column)][df[str(column)] == x].index[0])
    for column in range(0,NODES):
        df.drop(str(column), axis=1, inplace=True)
    return df.to_numpy()

# Example usage:
def euclidean_distance(point1, point2):
    def square(x):
        return x**2

    def sqrt(x):
        return x**0.5
    
    return sqrt(sum(square(p1 - p2) for p1, p2 in zip(point1, point2)))


def alternative_closest_index_in_list2(closest_index, min_distance, value1, list2):
    for i, value2 in enumerate(list2):
        if ~np.isnan(value1).any() and ~np.isnan(value2).any():
            distance = euclidean_distance(value1, value2)
            if distance < min_distance:
                min_distance = distance
                closest_index = i            
    return closest_index, min_distance
    
def closest_index_in_list2(count, closest_index, min_distance, closest_indices, j,value1,list2):
    for i, value2 in enumerate(list2):
            if ~np.isnan(value1).any() and ~np.isnan(value2).any():
                distance = euclidean_distance(value1, value2)
                if distance < min_distance:
                    min_distance = distance
                    closest_index = i
                if CYCLE_CORRECTION==False:
                    if i==len(list2)-1:
                        try:
                            if len(closest_indices)> 0:
                                # count = 0
                                index = closest_indices.index(closest_index)
                                if min_distance >= alternative_closest_index_in_list2(closest_index, min_distance, value1, np.delete(list2, closest_index, axis=0))[1]:
                                    list2 = np.delete(list2, closest_index, axis=0)
                                    closest_index=closest_index_in_list2(count+1, closest_index, float('inf'), closest_indices, j,value1,list2)[0]+1
                        except ValueError:
                            1
    return closest_index, min_distance

def closest_index_for_eachND(list1, list2):
    closest_indices = []
    min_distance = float('inf')
    for j, value1 in enumerate(list1):
        closest_index = None
        count=0
        closest_index,_ = closest_index_in_list2(count,closest_index,min_distance,closest_indices,j,value1,list2)
        closest_indices.append(closest_index)

    return closest_indices

def closest_index_matrix_ND(M_labels,NODE):
    for labels_node in range(0,M_labels.shape[0]):
        #print(M_labels[:][0])
        if labels_node==0:
            if labels_node==NODE:
                M=np.array(range(0,len(M_labels[:][NODE])))
            else:
                print("0")
                match = np.array(closest_index_for_eachND(M_labels[:][NODE], M_labels[:][labels_node])) 
                M = match
        if labels_node!=0:
            if labels_node == NODE:
                M = np.vstack((M,np.array(range(0,len(M_labels[:][NODE])))))
            else:
                print("!0")
                match = np.array(closest_index_for_eachND(M_labels[:][NODE], M_labels[:][labels_node]))
                M =  np.vstack((M, match))
    return np.transpose(M)
    
# Example 2D
M_labels = np.array([[2,6,2,2],[2,1,2,2],[4,2,3,6]])
M = closestIndex(M_labels,0)

In [None]:
if CYCLE_CORRECTION==False:
    M_EVAL=M_EVAL.reshape(M_EVAL.shape[0],M_EVAL.shape[1])
    SYNCHRONIZED_LABELS_EVAL=closestIndex(M_EVAL,NODE)
else:
    if PCA_CORRECTION==True:
        SYNCHRONIZED_LABELS_EVAL=closestIndexWithCosineDifferenceAndPCA(M_EVAL,NODE)
    else:
        SYNCHRONIZED_LABELS_EVAL=closestIndexWithCosineDifference(M_EVAL.tolist(),NODE)

In [None]:
if CYCLE_CORRECTION==False:
    M_TRAIN=M_TRAIN.reshape(M_TRAIN.shape[0],M_TRAIN.shape[1])
    SYNCHRONIZED_LABELS_TRAIN=closestIndex(M_TRAIN,NODE)
else:
    if PCA_CORRECTION==True:
        SYNCHRONIZED_LABELS_TRAIN=closestIndexWithCosineDifferenceAndPCA(M_TRAIN,NODE)
    else:
        SYNCHRONIZED_LABELS_TRAIN=closestIndexWithCosineDifference(M_TRAIN.tolist(),NODE)

In [None]:
def batch_3d_array(arr, batch_size):
    """
    Batch a 3D NumPy array into smaller chunks.

    Parameters:
    - arr: 3D NumPy array
    - batch_size: Size of each batch along the first dimension

    Returns:
    - NumPy array of batches, where each batch is a 3D NumPy array
    """
    shape = arr.shape
    if len(shape) != 3:
        raise ValueError("Input array must be 3D.")

    num_batches = (shape[0] + batch_size - 1) // batch_size
    batches = np.empty((num_batches, batch_size, shape[1], shape[2]), dtype=arr.dtype)

    for i in range(num_batches):
        start = i * batch_size
        end = min((i + 1) * batch_size, shape[0])
        batch = arr[start:end, :, :]
        batches[i, :end - start, :, :] = batch

    return batches

def batch_2d_array(arr, batch_size):
    """
    Batch a 2D NumPy array into smaller chunks.

    Parameters:
    - arr: 2D NumPy array
    - batch_size: Size of each batch

    Returns:
    - NumPy array of batches, where each batch is a 2D NumPy array
    """
    shape = arr.shape
    if len(shape) != 2:
        raise ValueError("Input array must be 2D.")

    num_batches = (shape[0] + batch_size - 1) // batch_size
    batches = np.empty((num_batches, batch_size, shape[1]), dtype=arr.dtype)

    for i in range(num_batches):
        start = i * batch_size
        end = min((i + 1) * batch_size, shape[0])
        batch = arr[start:end, :]
        batches[i, :end - start, :] = batch

    return batches
def batch_data_2d(test, batch_size):
    m,n = test.shape
    S = test.itemsize
    if not batch_size:
        batch_size = m
    count_batches = m//batch_size
    # Batches which can be covered fully
    test_batches = as_strided(test, shape=(count_batches, batch_size, n), strides=(batch_size*n*S,n*S,S)).copy()
    covered = count_batches*batch_size
    if covered < m:
        rest = test[covered:,:]
        rm, rn = rest.shape
        mismatch = batch_size - rm
        last_batch = np.vstack((rest,np.zeros((mismatch,rn)))).reshape(1,-1,n)
        return np.vstack((test_batches,last_batch))
    return test_batches

def batch_data_3d(test, batch_size):
    m,n,p = test.shape
    S = test.itemsize
    if not batch_size:
        batch_size = m
    count_batches = m//batch_size
    # Batches which can be covered fully
    test_batches = as_strided(test, shape=(count_batches, batch_size, n, p), strides=(batch_size*n*p*S,n*p*S,p*S,S)).copy()
    covered = count_batches*batch_size
    if covered < m:
        rest = test[covered:,:,:]
        rm, rn, rp = rest.shape
        mismatch = batch_size - rm
        last_batch = np.vstack((rest,np.zeros((mismatch,rn,rp)))).reshape(1,-1,n,p)
        return np.vstack((test_batches,last_batch))
    return test_batches

In [None]:
def extract_and_remove_first_batch(my_array, axis=0):
    extracted_batch = my_array[0]

    # Remove the first batch from the original array along the specified axis
    my_array = np.delete(my_array,0, axis=axis)

    return extracted_batch, my_array

def fn_data_partition_train(NODES, evaluation):
    
    # Global Variables
    global SCALER_X
    global SCALER_W
    global SCALER_Y
    global WINDOW_LEN
    global STRIDE
    global BATCH_SIZE
    global TRAINING_IDX
    global VAL_IDX
    global SHUFFLE_IDX
    
    distributedData=[]
    for FC in range(0, NODES):
        DATA_REPLICA_ID = []
        # Load data DEV
        with h5py.File("FC"+str(FC+1)+"/FC"+str(FC+1)+'_dev'+".h5", 'r') as hdf:
            # Development set
            W_train = np.array(hdf.get('W_dev'), dtype='float32')             # W
            X_s_train = np.array(hdf.get('X_s_dev'), dtype='float32')         # X_s
            Y_train = np.array(hdf.get('Y_dev'), dtype='float32')             # RUL                  
            A_train = np.array(hdf.get('A_dev'), dtype='float32')
            
            W_train = W_train[::DOWNSAMPLING_STEP[FC],:]
            X_s_train = X_s_train[::DOWNSAMPLING_STEP[FC],:] 
            Y_train = Y_train[::DOWNSAMPLING_STEP[FC],:]
            A_train = A_train[::DOWNSAMPLING_STEP[FC],:]

            # Varnams
            W_var = np.array(hdf.get('W_var'))
            X_s_var = np.array(hdf.get('X_s_var'))  
            X_v_var = np.array(hdf.get('X_v_var')) 
            T_var = np.array(hdf.get('T_var'))
            A_var = np.array(hdf.get('A_var'))

            # from np.array to list dtype U4/U5
            W_var = list(np.array(W_var, dtype='U20'))
            X_s_var = list(np.array(X_s_var, dtype='U20'))  
            X_v_var = list(np.array(X_v_var, dtype='U20')) 
            T_var = list(np.array(T_var, dtype='U20'))
            A_var = list(np.array(A_var, dtype='U20'))
            

            units_train=A_train[:,0].reshape(-1,1)
            cycles_train=A_train[:,1].reshape(-1,1)
            hi_train = A_train[:,-1]
                
            X_s_train = SCALER_X.fit_transform(X_s_train)
            W_train = SCALER_W.fit_transform(W_train)
            Y_train = SCALER_Y.fit_transform(Y_train)
            
            X_windows, _, _=sequence_generator_per_unit(X_s_train,units_train,cycles_train,sequence_length=WINDOW_LEN,stride = STRIDE)
            X_s_train=[]
            X_windows=X_windows[SHUFFLE_IDX[FC]]
            if evaluation==True:
                #X_=X_windows[int(len(X_windows)*0.85)+1:int(len(X_windows))-1]
                X_=X_windows[SYNCHRONIZED_LABELS_EVAL[:,FC]]
                X_windows=[]
                X_=X_[:,:,:]
            else:
                #X_=X_windows[0:int(len(X_windows)*0.85)]
                X_=X_windows[SYNCHRONIZED_LABELS_TRAIN[:,FC]]
                X_windows=[]
                X_=X_[:,:,:]
                
            W_windows,_,_=sequence_generator_per_unit(W_train,units_train,cycles_train,sequence_length=WINDOW_LEN,stride = STRIDE)
            W_train=[]
            W_windows=W_windows[SHUFFLE_IDX[FC]]
            if evaluation==True:
                #W_=W_windows[int(len(W_windows)*0.85)+1:int(len(W_windows))-1]
                W_=W_windows[SYNCHRONIZED_LABELS_EVAL[:,FC]]
                W_windows=[]
                W_=W_[:,:,:]
            else:
                #W_=W_windows[0:int(len(W_windows)*0.85)]
                W_=W_windows[SYNCHRONIZED_LABELS_TRAIN[:,FC]]
                W_windows=[]
                W_=W_[:,:,:]
            
            if PIECE_WISE==True:
                df_hs_unit_train = DataFrame({'unit': units_train.reshape(-1).astype(int),'RUL': Y_train.reshape(-1), 'hi': hi_train.reshape(-1)})
                df_hs_unit_train['RUL']=df_hs_unit_train.apply(lambda row: correctRUL(row['hi'],row['RUL']), axis=1)

                pd_aux=DataFrame(df_hs_unit_train.groupby('unit')['RUL'].max()).reset_index()
                df_hs_unit_train['RUL']=df_hs_unit_train.apply(lambda row: correctMaxRUL(row['hi'],row['RUL'], float(pd_aux.iloc[pd_aux.index[pd_aux['unit'] == row['unit']]]['RUL'])), axis=1)
                Y_train=df_hs_unit_train['RUL'].to_numpy().reshape(len(df_hs_unit_train),1)
                
            Y_windows,_,_=sequence_generator_per_unit(Y_train,units_train,cycles_train,sequence_length=WINDOW_LEN,option='last',stride = STRIDE)
            Y_train=[]
            units_train=[]
            cycles_train=[]
            Y_windows=Y_windows[SHUFFLE_IDX[FC]]
            
            if evaluation==True:
                #y_=Y_windows[int(len(Y_windows)*0.85)+1:int(len(Y_windows))-1]
                y_=Y_windows[SYNCHRONIZED_LABELS_EVAL[:,FC]]
                Y_windows=[]
                y_=y_[:]
            else:
                #y_=Y_windows[0:int(len(Y_windows)*0.85)]
                y_=Y_windows[SYNCHRONIZED_LABELS_TRAIN[:,FC]]
                Y_windows=[]
                y_=y_[:]
                
            X_ = batch_data_3d(X_,BATCH_SIZE)
            W_ = batch_data_3d(W_,BATCH_SIZE)
            y_= batch_data_2d(y_,BATCH_SIZE)
        for i in range(0,len(y_)):
            DATA_REPLICA_ID.append(((tf.convert_to_tensor(X_[i], dtype=tf.float32),tf.convert_to_tensor(W_[i], dtype=tf.float32)),tf.convert_to_tensor(y_[i], dtype=tf.float32)))
        distributedData.append(DATA_REPLICA_ID)
    return distributedData

In [None]:
distributedDataTrain = fn_data_partition_train(NODES,False)
SYNCHRONIZED_LABELS_TRAIN=[]
M_TRAIN=[]
TRAINING_IDX = {}
def value_fn_train(ctx):
    return distributedDataTrain[ctx.replica_id_in_sync_group]
distributed_values_train = strategy.experimental_distribute_values_from_function(value_fn_train)
distributedDataTrain=[]

In [None]:
distributedDataEval = fn_data_partition_train(NODES,True)
SYNCHRONIZED_LABELS_EVAL=[]
M_EVAL=[]
VAL_IDX = {}
def value_fn_eval(ctx):
    return distributedDataEval[ctx.replica_id_in_sync_group]
distributed_values_eval = strategy.experimental_distribute_values_from_function(value_fn_eval)
distributedDataEval=[]

# Create Model

In [None]:
#RUL MODEL

def predictor(t=64,
      feature_X_in=14,
      feature_W_in=4,
      feature_H_in=1,
      feature_out_size=1,
      activation='relu',
      filter = [10,10,1],
      filter_size = 10,
      useH=True):
    
    '''
    useH: if True, use H as input
        [X,W,H] -> Y 
    else:
        [X,W] -> Y
    '''

    x_in=layers.Input(shape=(t,feature_X_in),name="X_in")
    w_in = layers.Input(shape=(t,feature_W_in),name="W_in")
    
    
    if useH:
      h_in = layers.Input(shape=(t,feature_H_in),name="H_in")
      # h_in = layers.Input(shape=(1,1),name="H_in")
      x = tf.concat([x_in,w_in, h_in],-1)
    else: 
      x = tf.concat([x_in,w_in],-1)
      
    for i in filter:
      x = layers.Conv1D(i,filter_size,1,padding='same',activation = activation)(x)
      # x = layers.BatchNormalization()(x)
      
    x = layers.Flatten()(x)
    y = layers.Dense(50,activation = activation)(x)
    y = layers.Dense(feature_out_size,activation = 'linear',name="predictions")(y)

    if useH:
      model = models.Model([x_in,w_in,h_in], y)
    else:
      model = models.Model([x_in,w_in], y)

    return model

In [None]:

def inception2D(t=64,
      feature_X_in=14,
      feature_W_in=4,
      feature_out_size=1):
    
    '''
    useH: if True, use H as input
        [X,W,H] -> Y 
    else:
        [X,W] -> Y
    '''

    x_in=layers.Input(shape=(t,feature_X_in,1),name="X_in")
    w_in = layers.Input(shape=(t,feature_W_in,1),name="W_in")
    

    x = tf.concat([x_in,w_in],-2)
      
    layer_1 = tf.keras.layers.Conv2D(10, (3,3), padding='same', activation='relu')(x)

    layer_2 = tf.keras.layers.Conv2D(10, (5,5), padding='same', activation='relu')(x)

    layer_3 = tf.keras.layers.MaxPooling2D(3, strides=(1,1), padding='same')(x)
    layer_3 = tf.keras.layers.Conv2D(10, (1,1), padding='same', activation='relu')(layer_3)

    mid_1 = tf.keras.layers.concatenate([layer_1, layer_2, layer_3], axis = 3)

    ### 2nd Module
    layer_4 = tf.keras.layers.Conv2D(10, (1,1), padding='same', activation='relu')(mid_1)
    layer_4 = tf.keras.layers.Conv2D(10, (3,3), padding='same', activation='relu')(layer_4)

    layer_5 = tf.keras.layers.Conv2D(10, (1,1), padding='same', activation='relu')(mid_1)
    layer_5 = tf.keras.layers.Conv2D(10, (5,5), padding='same', activation='relu')(layer_5)

    layer_6 = tf.keras.layers.MaxPooling2D(1, strides=(1,1), padding='same')(mid_1)
    layer_6 = tf.keras.layers.Conv2D(10, (1,1), padding='same', activation='relu')(layer_6)

    mid_2 = tf.keras.layers.concatenate([layer_4, layer_5, layer_6], axis = 2)

    flat_1 = tf.keras.layers.Flatten()(mid_2)

    drop = tf.keras.layers.Dropout(.5)(flat_1)

    dense_1 = tf.keras.layers.Dense(256, activation='sigmoid')(drop)
    y = tf.keras.layers.Dense(feature_out_size, activation='relu')(dense_1)

    model = models.Model([x_in,w_in], y)

    return model

def inception1D(t=64,
      feature_X_in=14,
      feature_W_in=4,
      feature_out_size=1):
    
    '''
    useH: if True, use H as input
        [X,W,H] -> Y 
    else:
        [X,W] -> Y
    '''

    x_in=layers.Input(shape=(t,feature_X_in),name="X_in")
    w_in = layers.Input(shape=(t,feature_W_in),name="W_in")
    

    x = tf.concat([x_in,w_in],-1)
      
    layer_1 = tf.keras.layers.Conv1D(10, 3, padding='same', activation='relu')(x)

    layer_2 = tf.keras.layers.Conv1D(10, 5, padding='same', activation='relu')(x)

    layer_3 = tf.keras.layers.MaxPooling1D(3, strides=1, padding='same')(x)
    layer_3 = tf.keras.layers.Conv1D(10, 1, padding='same', activation='relu')(layer_3)

    mid_1 = tf.keras.layers.concatenate([layer_1, layer_2, layer_3], axis = 2)

    ### 2nd Module
    layer_4 = tf.keras.layers.Conv1D(10, 1, padding='same', activation='relu')(mid_1)
    layer_4 = tf.keras.layers.Conv1D(10, 3, padding='same', activation='relu')(layer_4)

    layer_5 = tf.keras.layers.Conv1D(10, 1, padding='same', activation='relu')(mid_1)
    layer_5 = tf.keras.layers.Conv1D(10, 5, padding='same', activation='relu')(layer_5)

    layer_6 = tf.keras.layers.MaxPooling1D(1, strides=1, padding='same')(mid_1)
    layer_6 = tf.keras.layers.Conv1D(10, 1, padding='same', activation='relu')(layer_6)

    mid_2 = tf.keras.layers.concatenate([layer_4, layer_5, layer_6], axis = 2)

    flat_1 = tf.keras.layers.Flatten()(mid_2)

    drop = tf.keras.layers.Dropout(.5)(flat_1)

    dense_1 = tf.keras.layers.Dense(256, activation='sigmoid')(drop)
    y = tf.keras.layers.Dense(feature_out_size, activation='relu')(dense_1)

    model = models.Model([x_in,w_in], y)

    return model

In [None]:
# Create a checkpoint directory to store the checkpoints.
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")

## Define the loss function

Recall that the loss function consists of one or two parts:

  * The **prediction loss** measures how far off the model's predictions are from the training labels for a batch of training examples. It is computed for each labeled example and then reduced across the batch by computing the average value.
  * Optionally, **regularization loss** terms can be added to the prediction loss, to steer the model away from overfitting the training data. A common choice is L2 regularization, which adds a small fixed multiple of the sum of squares of all model weights, independent of the number of examples. The model above uses L2 regularization to demonstrate its handling in the training loop below.

For training on a single machine with a single GPU/CPU, this works as follows:

  * The prediction loss is computed for each example in the batch, summed across the batch, and then divided by the batch size.
  * The regularization loss is added to the prediction loss.
  * The gradient of the total loss is computed w.r.t. each model weight, and the optimizer updates each model weight from the corresponding gradient.

With `tf.distribute.Strategy`, the input batch is split between replicas.
For example, let's say you have 4 GPUs, each with one replica of the model. One batch of 256 input examples is distributed evenly across the 4 replicas, so each replica gets a batch of size 64: We have `256 = 4*64`, or generally `GLOBAL_BATCH_SIZE = num_replicas_in_sync * BATCH_SIZE_PER_REPLICA`.

Each replica computes the loss from the training examples it gets and computes the gradients of the loss w.r.t. each model weight. The optimizer takes care that these **gradients are summed up across replicas** before using them to update the copies of the model weights on each replica.

*So, how should the loss be calculated when using a `tf.distribute.Strategy`?*

  * Each replica computes the prediction loss for all examples distributed to it, sums up the results and divides them by `num_replicas_in_sync * BATCH_SIZE_PER_REPLICA`, or equivently, `GLOBAL_BATCH_SIZE`.
  * Each replica compues the regularization loss(es) and divides them by
  `num_replicas_in_sync`.

Compared to non-distributed training, all per-replica loss terms are scaled down by a factor of `1/num_replicas_in_sync`. On the other hand, all loss terms -- or rather, their gradients -- are summed across that number of replicas before the optimizer applies them. In effect, the optimizer on each replica uses the same gradients as if a non-distributed computation with `GLOBAL_BATCH_SIZE` had happened. This is consistent with the distributed and undistributed behavior of Keras `Model.fit`. See the [Distributed training with Keras](./keras.ipynb) tutorial on how a larger gloabl batch size enables to scale up the learning rate.

In [None]:
with strategy.scope():
    def compute_loss_batch(labels, predictions, model_losses):
        per_example_loss = (labels - predictions)**2  # Sample error
        loss = tf.math.sqrt(tf.nn.compute_average_loss(per_example_loss)) # Batch Error
        return loss

In [None]:
with strategy.scope():
    eval_mae = tf.keras.metrics.MeanAbsoluteError()
    train_rmse = tf.keras.metrics.RootMeanSquaredError()
    eval_rmse = tf.keras.metrics.RootMeanSquaredError()

In [None]:
# A model, an optimizer, and a checkpoint must be created under `strategy.scope`.
with strategy.scope():
    #model = predictor(t=50,useH=False)
    model = inception1D(t=WINDOW_LEN)
    optimizer = tf.keras.optimizers.experimental.SGD(learning_rate=LEARNING_RATE)
    checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)

## Auxiliar Funcions for Training

In [None]:
def train_step_batch(inputs):
    input_signals, labels = inputs
    with tf.GradientTape() as tape:
        predictions = model(input_signals, training=True)
        loss = compute_loss_batch(labels, predictions, model.losses) # Batch Error
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables),skip_gradients_aggregation=True)
    train_rmse.update_state(labels, predictions)
    return loss
def train_step_sample(inputs):
    input_signals, labels = inputs
    with tf.GradientTape() as tape:
        predictions = model(input_signals, training=True)
    return predictions

def compute_loss_fedLabSync(inputs, collaborativePredictions):
    input_signals, labels = inputs
    with tf.GradientTape() as tape:
        predictions = model(input_signals, training=True)
        loss = compute_loss_batch(labels, (collaborativePredictions*(COLLAB_LOSS_WEIGHT)+predictions*(1-COLLAB_LOSS_WEIGHT)), model.losses) # Batch Error
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_rmse.update_state(labels, (collaborativePredictions*(COLLAB_LOSS_WEIGHT)+predictions*(1-COLLAB_LOSS_WEIGHT)))
    return loss
    
def test_step_batch(inputs, collaborativePredictions):
    input_signals, labels = inputs
    eval_mae.update_state(labels, collaborativePredictions)
    eval_rmse.update_state(labels, collaborativePredictions)
def test_step(inputs):
    input_signals, labels = inputs
    predictions = model(input_signals, training=False)
    eval_mae.update_state(labels, predictions)
    eval_rmse.update_state(labels, predictions)

In [None]:
# with the distributed input.
@tf.function
def distributed_train_step_batch(dataset_inputs):
    per_replica_losses = strategy.run(train_step_batch, args=(dataset_inputs,)) 
    return strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses,
                         axis=None)
@tf.function
def collaborative_predictions(dataset_inputs):
    per_replica_predictions= strategy.run(train_step_sample, args=(dataset_inputs,))
    return strategy.reduce(tf.distribute.ReduceOp.MEAN, per_replica_predictions,  
                         axis=None)
@tf.function
def local_collaborative_loss(collaborative_predictions, dataset_inputs):
    return strategy.run(compute_loss_fedLabSync, args=(dataset_inputs, collaborative_predictions,))

@tf.function
def distributed_test_step_batch(dataset_inputs,collaborative_predictions):
    return strategy.run(test_step_batch, args=(dataset_inputs,collaborative_predictions,))
@tf.function
def distributed_test_step(dataset_inputs):
    return strategy.run(test_step, args=(dataset_inputs,))

# Training Procedure

In [None]:
from datetime import datetime
past = datetime.now()
early_stopping = EarlyStopping(patience=PATIENCE)

for epoch in range(EPOCHS):
    # TRAIN LOOP
    num_batches = 0
    for x in distributed_values_train:
        distributed_train_step_batch(x)
        #colaboratePrediction = collaborative_predictions(x)
        #total_loss = local_collaborative_loss(colaboratePrediction, x)
        #num_batches += 1
    
    for x in distributed_values_eval:
        distributed_test_step(x)
        #colaboratePrediction = collaborative_predictions(x)
        #distributed_test_step_batch(x,colaboratePrediction)

    if epoch % 2 == 0:
        checkpoint.save(checkpoint_prefix)

    template = ("Epoch {}, Train_RMSE: {}, Eval MAE: {}, "
              "Eval_RMSE: {}")
    print(template.format(epoch + 1, train_rmse.result(), eval_mae.result(),
                         eval_rmse.result()))
    if early_stopping(eval_rmse.result()):
        break
    eval_mae.reset_states()
    train_rmse.reset_states()
    eval_rmse.reset_states()
    
last = datetime.now()
time=last-past
print("Training Time =", time)

# Save Model

In [None]:
""" with strategy.scope():
    saved_model_path = "RUL_MODEL/FC"+str(NODE+1)+"/federated"
    tf.saved_model.save(model, saved_model_path) """

# Load Model

In [None]:
""" with strategy.scope():
    saved_model_path = "RUL_MODEL/FC"+str(NODE+1)+"/federated"
    loaded = tf.saved_model.load(saved_model_path)
    infer = loaded.signatures["serving_default"]
    print(infer) """

In [None]:
NODE=1
for FC in range(NODE,NODE+1):
        # Load data DEV
        with h5py.File("FC"+str(FC+1)+"/FC"+str(FC+1)+'_test'+".h5", 'r') as hdf:
            # Development set
            W_test = np.array(hdf.get('W_test'), dtype='float32')             # W
            X_s_test = np.array(hdf.get('X_s_test'), dtype='float32')         # X_s
            Y_test = np.array(hdf.get('Y_test'), dtype='float32')             # RUL                  
            A_test = np.array(hdf.get('A_test'), dtype='float32')
            
            W_test = W_test[::DOWNSAMPLING_STEP[FC],:]
            X_s_test = X_s_test[::DOWNSAMPLING_STEP[FC],:] 
            Y_test = Y_test[::DOWNSAMPLING_STEP[FC]]
            A_test = A_test[::DOWNSAMPLING_STEP[FC],:]

            # Varnams
            W_var = np.array(hdf.get('W_var'))
            X_s_var = np.array(hdf.get('X_s_var'))  
            X_v_var = np.array(hdf.get('X_v_var')) 
            T_var = np.array(hdf.get('T_var'))
            A_var = np.array(hdf.get('A_var'))

            # from np.array to list dtype U4/U5
            W_var = list(np.array(W_var, dtype='U20'))
            X_s_var = list(np.array(X_s_var, dtype='U20'))  
            X_v_var = list(np.array(X_v_var, dtype='U20')) 
            T_var = list(np.array(T_var, dtype='U20'))
            A_var = list(np.array(A_var, dtype='U20'))
        
        #if FC==0:
            W_test_aux = W_test
            X_s_test_aux = X_s_test
            Y_test_aux = Y_test
            A_test_aux = A_test
        #if FC!=0:
        #    W_test_aux = np.concatenate((W_test_aux, W_test), axis=0)  
        #    X_s_test_aux = np.concatenate((X_s_test_aux, X_s_test), axis=0)
        #    Y_test_aux = np.concatenate((Y_test_aux, Y_test), axis=0) 
        #    A_test_aux = np.concatenate((A_test_aux, A_test), axis=0)
            
units_test=A_test_aux[:,0].reshape(-1,1)
cycles_test=A_test_aux[:,1].reshape(-1,1)
hi_test = A_test_aux[:,-1]

if PIECE_WISE==True:
    df_hs_unit_train = DataFrame({'unit': units_test.reshape(-1).astype(int),'RUL': Y_test_aux.reshape(-1), 'hi': hi_test.reshape(-1)})
    df_hs_unit_train['RUL']=df_hs_unit_train.apply(lambda row: correctRUL(row['hi'],row['RUL']), axis=1)

    pd_aux=DataFrame(df_hs_unit_train.groupby('unit')['RUL'].max()).reset_index()
    df_hs_unit_train['RUL']=df_hs_unit_train.apply(lambda row: correctMaxRUL(row['hi'],row['RUL'], float(pd_aux.iloc[pd_aux.index[pd_aux['unit'] == row['unit']]]['RUL'])), axis=1)
    Y_test_aux=df_hs_unit_train['RUL'].to_numpy().reshape(len(df_hs_unit_train),1)
    
    
                
X_s_test = SCALER_X.fit_transform(X_s_test_aux)
W_test = SCALER_W.fit_transform(W_test_aux)
Y_test = SCALER_Y.fit_transform(Y_test_aux)
            
X_windows_test, U_windows_test,C_windows_test=sequence_generator_per_unit(X_s_test,units_test,cycles_test,sequence_length=WINDOW_LEN,stride = STRIDE)
W_windows_test,_,_=sequence_generator_per_unit(W_test,units_test,cycles_test,sequence_length=WINDOW_LEN,stride = STRIDE)
Y_windows_test,_,_=sequence_generator_per_unit(Y_test,units_test,cycles_test,sequence_length=WINDOW_LEN,option='last',stride = STRIDE)
                
X_=X_windows_test[:,:,:]
W_=W_windows_test[:,:,:]
y_=Y_windows_test[:]

#predictions=np.array(loaded.signatures['serving_default'](W_in=tf.convert_to_tensor(X_, dtype=tf.float32),X_in=tf.convert_to_tensor(W_, dtype=tf.float32))['predictions'])

In [None]:
predictions = model.predict((tf.convert_to_tensor(X_, dtype=tf.float32),tf.convert_to_tensor(W_, dtype=tf.float32)))


In [None]:
#mean_prediction= np.mean( np.array([ predictions1, predictions2, predictions3 ]), axis=0 )
rul_predicted_ = SCALER_Y.inverse_transform(predictions)
groud_truth = SCALER_Y.inverse_transform(Y_windows_test)

In [None]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
mae=mean_absolute_error(rul_predicted_,groud_truth)
mse=mean_squared_error(rul_predicted_,groud_truth)
rmse=np.sqrt(mse)
print("GAUSSIAN KALMAN")
print("MAE:",mae)
print("MSE:",mse)
print("RMSE:",rmse)

In [None]:
# FC1

if NODE==0:
    units = [107,109,214,312,314,508,510,608,610,708,710,810,815] 
# FC2
if NODE==1:
    units = [108,215,315,507,607,707,811,907,908,909,910] 
# FC3
if NODE==2:
    units = [110,211,310,311,313,509,609,709,812,813,814]
numberUnits=4
start=0
numberUnits = int(len(units))
for x in range(start,len(units),numberUnits):
    if (x+2*numberUnits) < len(units):
    #if (x+numberUnits) < len(units):
        unit_sel = np.array(units[x:x+numberUnits], dtype=int)
        plot_predicted_true_rul([rul_predicted_], unit_sel, U_windows_test, C_windows_test[:,0,:], groud_truth)
    else:
        unit_sel = np.array(units[x:len(np.unique(U_windows_test))], dtype=int)
        plot_predicted_true_rul([rul_predicted_], unit_sel, U_windows_test, C_windows_test[:,0,:], groud_truth)

# Test All trajectories

In [None]:
def fn_data_partition_test(NODES):
    # Global Variables
    global SCALER_X
    global SCALER_W
    global SCALER_Y
    global WINDOW_LEN
    global STRIDE
    distributedData=[]
    DATA_REPLICA_ID=[]
    for FC in range(0,NODES):
        # Load data TEST
        with h5py.File("FC"+str(FC+1)+"/FC"+str(FC+1)+'_test'+".h5", 'r') as hdf:
                    # Development set
                    W_test = np.array(hdf.get('W_test'), dtype='float16')             # W
                    X_s_test = np.array(hdf.get('X_s_test'), dtype='float16')         # X_s
                    X_v_test = np.array(hdf.get('X_v_test'), dtype='float16')         # X_v
                    T_test = np.array(hdf.get('T_test'), dtype='float16')             # T
                    Y_test = np.array(hdf.get('Y_test'), dtype='float16')             # RUL  
                    A_test = np.array(hdf.get('A_test'), dtype='float16')
                    
                    W_test = W_test[::DOWNSAMPLING_STEP[FC],:]
                    X_s_test = X_s_test[::DOWNSAMPLING_STEP[FC],:]
                    X_v_test = X_v_test[::DOWNSAMPLING_STEP[FC],:]
                    T_test = T_test[::DOWNSAMPLING_STEP[FC],:] 
                    Y_test = Y_test[::DOWNSAMPLING_STEP[FC],:]
                    A_test = A_test[::DOWNSAMPLING_STEP[FC],:]
                    
                    if FC==0:
                        W_test_aux = W_test
                        X_s_test_aux = X_s_test
                        Y_test_aux = Y_test
                        A_test_aux = A_test
                    if FC!=0:
                        W_test_aux = np.concatenate((W_test_aux, W_test), axis=0)  
                        X_s_test_aux = np.concatenate((X_s_test_aux, X_s_test), axis=0)
                        Y_test_aux = np.concatenate((Y_test_aux, Y_test), axis=0) 
                        A_test_aux = np.concatenate((A_test_aux, A_test), axis=0)
            

    units_test=W_test_aux[:,0].reshape(-1,1)
    cycles_test=A_test_aux[:,1].reshape(-1,1)
    fc_test = A_test_aux[:,2].reshape(-1,1)
    hi_test = A_test_aux[:,-1]

    if PIECE_WISE==True:
        df_hs_unit_test = DataFrame({'unit': units_test.reshape(-1).astype(int),'RUL': Y_test_aux.reshape(-1), 'hi': hi_test.reshape(-1)})
        df_hs_unit_test['RUL']=df_hs_unit_test.apply(lambda row: correctRUL(row['hi'],row['RUL']), axis=1)

        pd_aux=DataFrame(df_hs_unit_test.groupby('unit')['RUL'].max()).reset_index()
        df_hs_unit_test['RUL']=df_hs_unit_test.apply(lambda row: correctMaxRUL(row['hi'],row['RUL'], float(pd_aux.iloc[pd_aux.index[pd_aux['unit'] == row['unit']]]['RUL'])), axis=1)
        Y_test_aux=df_hs_unit_test['RUL'].to_numpy().reshape(len(df_hs_unit_test),1)


    # SCALE

    X_s_test = SCALER_X.transform(X_s_test_aux)
    W_test = SCALER_W.transform(W_test_aux)
    Y_test = SCALER_Y.transform(Y_test_aux)

    X_windows_test, U_windows_test,C_windows_test=sequence_generator_per_unit(X_s_test,units_test,cycles_test,sequence_length=WINDOW_LEN,stride = STRIDE)
    W_windows_test,_,_=sequence_generator_per_unit(W_test,units_test,cycles_test,sequence_length=WINDOW_LEN,stride = STRIDE)
    Y_windows_test,_,_=sequence_generator_per_unit(Y_test,units_test,cycles_test,sequence_length=WINDOW_LEN,option='last',stride = STRIDE)
    
    X_ = batch_data_3d(X_windows_test,BATCH_SIZE)
    W_ = batch_data_3d(W_windows_test,BATCH_SIZE)
    y_= batch_data_2d(Y_windows_test,BATCH_SIZE)
    
    

In [None]:
distributedDataTest = fn_data_partition_test(NODES)

In [None]:
def value_fn_test(ctx):
    return distributedDataTest[ctx.replica_id_in_sync_group]
distributed_values_test = strategy.experimental_distribute_values_from_function(value_fn_test)
for x in distributed_values_test:
    colaboratePrediction = collaborative_predictions(x)

In [None]:
colaboratePrediction