# Dev and Train data for each Flight Class (FC)

This notebook concatenates $DS*_{dev}.h5$ and $DS*_{test}.h5$ data obtained by executing "". By defining the flight class FC and filenames of interest in "Data of interest" section, this notebook saves a $FC*_{dev}.h5$ or a $FC*_{dev}.h5$ dataset file. 

The resulting dataset files are used to run: 
- Deep Learning N-CMAPSS by Flight Class.ipynb
- Deep Learning N-CMAPSS Data-centralized.ipynb
- Collaborative prediction using NCMAPSS.ipynb


# Libraries

In [None]:
#!pip install seaborn

In [56]:
import os
import h5py
import time
import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns
from pandas import DataFrame
import matplotlib.pyplot as plt
from matplotlib import gridspec
%matplotlib inline

# Data of interest

In [77]:
### Set-up - Define file location
FC = 3
MODE = 'TEST'


filenames = ['N-CMAPSS_DS01-005',
           'N-CMAPSS_DS02-006', 
           #np.nan,
            'N-CMAPSS_DS03-012',
           'N-CMAPSS_DS04',
           #np.nan,
             'N-CMAPSS_DS05',
           'N-CMAPSS_DS06',
           'N-CMAPSS_DS07',
           'N-CMAPSS_DS08a-009',
           #'N-CMAPSS_DS08c-008',
           np.nan,
          ]


In [78]:
# Load data
if MODE == 'DEV':
    mode = '_dev' 
if MODE == 'TEST':
    mode = '_test'

## Read Data from multiple filenames

In [79]:
# Time tracking, Operation time (min):  0.003
t = time.process_time()  

for filename_poss in range(len(filenames)):
    if pd.isnull(filenames[filename_poss])==False:
        if filename_poss==0:
            with h5py.File("FC"+str(FC)+"/"+MODE+"/"+filenames[filename_poss]+mode+".h5", 'r') as hdf:
                # Development set
                W = np.array(hdf.get('W'+mode))             # W
                X_s = np.array(hdf.get('X_s'+mode))         # X_s
                X_v = np.array(hdf.get('X_v'+mode))         # X_v
                T = np.array(hdf.get('T'+mode))             # T
                Y = np.array(hdf.get('Y'+mode))             # RUL  
                A = np.array(hdf.get('A'+mode))

                # Varnams
                W_var = np.array(hdf.get('W_var'))
                X_s_var = np.array(hdf.get('X_s_var'))  
                X_v_var = np.array(hdf.get('X_v_var')) 
                T_var = np.array(hdf.get('T_var'))
                A_var = np.array(hdf.get('A_var'))

                W_var_array  = np.array(W_var)
                X_s_var_array = np.array(X_s_var)
                X_v_var_array = np.array(X_v_var)
                T_var_array = np.array(T_var)
                A_var_array = np.array(A_var)

                df_A = DataFrame(data=A, columns=list(np.array(A_var, dtype='U20')))
                df_A['unit'] = df_A['unit']+(filename_poss+1)*100
                A = df_A.to_numpy(dtype='float32')


        if filename_poss==(len(filenames)-1):
            print('')
            print("Operation time (min): " , (time.process_time()-t)/60)
            print('')
            print ("W shape: " + str(W.shape))
            print ("X_s shape: " + str(X_s.shape))
            print ("X_v shape: " + str(X_v.shape))
            print ("T shape: " + str(T.shape))
            print ("Y shape: " + str(Y.shape))
            print ("A shape: " + str(A.shape))

            print ("W_var shape: " + str(len(W_var)))
            print ("X_s_var shape: " + str(len(X_s_var)))
            print ("X_v_var shape: " + str(len(X_v_var)))
            print ("T_var shape: " + str(len(T_var)))
            print ("A_var shape: " + str(len(A_var)))


        with h5py.File("FC"+str(FC)+"/"+MODE+"/"+filenames[filename_poss]+mode+".h5", 'r') as hdf:
            # Development set
            W_ = np.array(hdf.get('W'+mode))             # W
            X_s_ = np.array(hdf.get('X_s'+mode))         # X_s
            X_v_ = np.array(hdf.get('X_v'+mode))         # X_v
            T_ = np.array(hdf.get('T'+mode))             # T
            Y_ = np.array(hdf.get('Y'+mode))             # RUL  
            A_ = np.array(hdf.get('A'+mode))             # Auxiliary

                # Varnams
            W_var = np.array(hdf.get('W_var'))
            X_s_var = np.array(hdf.get('X_s_var'))  
            X_v_var = np.array(hdf.get('X_v_var')) 
            T_var = np.array(hdf.get('T_var'))
            A_var = np.array(hdf.get('A_var'))

                # from np.array to list dtype U4/U5
            W_var = list(np.array(W_var, dtype='U20'))
            X_s_var = list(np.array(X_s_var, dtype='U20'))  
            X_v_var = list(np.array(X_v_var, dtype='U20')) 
            T_var = list(np.array(T_var, dtype='U20'))
            A_var = list(np.array(A_var, dtype='U20'))


            df_A = DataFrame(data=A_, columns=list(np.array(A_var, dtype='U20')))
            df_A['unit'] = df_A['unit']+(filename_poss+1)*100
            A_ = df_A.to_numpy(dtype='float32')

            W = np.concatenate((W, W_), axis=0)  
            X_s = np.concatenate((X_s, X_s_), axis=0)
            X_v = np.concatenate((X_v, X_v_), axis=0)
            T = np.concatenate((T, T_), axis=0)
            Y = np.concatenate((Y, Y_), axis=0) 
            A = np.concatenate((A, A_), axis=0) 

## Save data per mode

In [80]:
with h5py.File("FC"+str(FC)+mode+'.h5', 'w') as f:
    f.create_dataset('T'+mode, data=T)
    f.create_dataset('Y'+mode, data=Y)
    f.create_dataset('A'+mode, data=A)
    f.create_dataset('W'+mode, data=W)
    f.create_dataset('X_s'+mode, data=X_s)
    f.create_dataset('X_v'+mode, data=X_v)
    f.create_dataset('W_var', data=W_var_array)
    f.create_dataset('X_s_var', data=X_s_var_array)
    f.create_dataset('X_v_var', data=X_v_var_array)
    f.create_dataset('T_var', data=T_var_array)
    f.create_dataset('A_var', data=A_var_array)