# Initialization

Test notebook for the C-MAPPS benchmark. Get best parameters for each dataset. 

First we import the necessary packages and create the global variables.

In [1]:
import math
import time
import numpy as np
import csv
import sys

from keras.models import Sequential, Model
from keras.layers import Dense, Input, Dropout, Reshape, Conv2D, Flatten, MaxPooling2D
from keras.optimizers import Adam
from keras.callbacks import LearningRateScheduler
from keras import backend as K
from keras import regularizers

from sklearn.preprocessing import StandardScaler, MinMaxScaler

sys.path.append('/Users/davidlaredorazo/Documents/University_of_California/Research/Projects')

from ann_framework.data_handlers.data_handler_CMAPSS import CMAPSSDataHandler
from ann_framework.tunable_model.tunable_model import SequenceTunableModelRegression
from ann_framework import aux_functions

from scipy.optimize import differential_evolution

Using TensorFlow backend.


# Define the model

In [2]:
#K.clear_session()  #Clear the previous tensorflow graph

l2_lambda_regularization = 0.20
l1_lambda_regularization = 0.10

def RULmodel_SN(input_shape):
    #Create a sequential model
    model = Sequential()
    
    #Add the layers for the model
    model.add(Dense(20, input_dim=input_shape, activation='relu', kernel_initializer='glorot_normal', 
                    kernel_regularizer=regularizers.L1L2(l1_lambda_regularization, l2_lambda_regularization), 
                    name='fc1'))
    model.add(Dense(20, input_dim=input_shape, activation='relu', kernel_initializer='glorot_normal', 
                    kernel_regularizer=regularizers.L1L2(l1_lambda_regularization, l2_lambda_regularization), 
                    name='fc2'))
    model.add(Dense(1, activation='linear', 
                    kernel_regularizer=regularizers.L1L2(l1_lambda_regularization, l2_lambda_regularization), name='out'))
    
    return model

# Create the Data Handler

In [3]:
#Selected as per CNN paper
features = ['T2', 'T24', 'T30', 'T50', 'P2', 'P15', 'P30', 'Nf', 'Nc', 'epr', 'Ps30', 'phi', 'NRf', 'NRc', 
                     'BPR', 'farB', 'htBleed', 'Nf_dmd', 'PCNfR_dmd', 'W31', 'W32']
selected_indices = np.array([2, 3, 4, 7, 8, 9, 11, 12, 13, 14, 15, 17, 20, 21])
selected_features = list(features[i] for i in selected_indices-1)
data_folder = '../CMAPSSData'

window_size = 30
window_stride = 1
max_rul = 125

min_max_scaler = MinMaxScaler(feature_range=(-1, 1))

dHandler_cmaps = CMAPSSDataHandler(data_folder, 1, selected_features, max_rul, window_size, window_stride)
#dHandler_cmaps.load_data(verbose=1, cross_validation_ratio=0.2)

# Create theTunable Model

In [5]:
min_max_scaler = MinMaxScaler(feature_range=(-1, 1))
#min_max_scaler = None

In [6]:
def get_compiled_model(shape, model_type='ann'):

    #Shared parameters for the models
    optimizer = Adam(lr=0, beta_1=0.5)
    lossFunction = "mean_squared_error"
    metrics = ["mse"]
    model = None

    #Create and compile the models

    if model_type=='ann':
        model = RULmodel_SN(shape)
        model.compile(optimizer = optimizer, loss = lossFunction, metrics = metrics)
    elif model_type=='lstm':
        """nFeatures = len(selected_features)
        shapeLSTM = (window_size, nFeatures)
        model = RULmodel_LSTM(shapeLSTM)
        model.compile(optimizer = optimizer, loss = lossFunction, metrics = metrics)"""
        pass
    else:
        pass

    return model

In [None]:
nFeatures = len(selected_features)
shapeSN = nFeatures*window_size
modelRULSN = get_compiled_model(shapeSN, model_type='ann')
tModel = SequenceTunableModelRegression('ModelRUL_SN_1', modelRULSN, lib_type='keras', data_handler=dHandler_cmaps)
#tModel = SequenceTunableModelRegression('ModelRUL_LSTM_1', modelRULLSTM, lib_type='keras', data_handler=dHandler_cmaps)
tModel.data_scaler = min_max_scaler

# Define objective function

Define the function that evaluates each set of data-related params and returns the RMSE as value.

In [None]:
def nn_optmize_fun(x, tModel, verbose=0, epochs=250, saveToFile = None, iterations = 0):
    
    #Clear the previous tensorflow graph
    K.clear_session()
    
    maxWindowSize = {'1':30, '2':20, '3':30, '4':18}
    
    #Extract the tunning variables from the input vector
    #Round the values to the nearest integer since this implementation is for real numbers
    x = x.astype(int)

    #load the data using the selected parameters
    tModel.data_handler.sequence_length = x[0]
    #tModel.data_handler.sequence_length = maxWindowSize[datasetNumber]
    tModel.data_handler.sequence_stride = x[1]
    tModel.data_handler.max_rul = x[2]
    
    tModel.load_data(unroll=True, verbose=0, cross_validation_ratio=0, reload_data=True)
    
    if iterations == 0:
        print("Training model")
    
    #Create new model
    lrate = LearningRateScheduler(aux_functions.step_decay)
    nFeatures = len(selected_features)
    model_shape = nFeatures*tModel.data_handler.sequence_length
    modelRULSN = get_compiled_model(model_shape, model_type='ann')
    tModel.change_model('ModelRUL_SN_1', modelRULSN, 'keras')
    tModel.epochs = 20
    
    #Train model
    tModel.train_model(learningRate_scheduler=lrate, verbose=0)
    time = tModel.train_time
    
    if iterations == 0:
        print("Training time {}".format(time))
    
    if iterations == 0:
        print("Assesing model performance")
    #Assess the model performance
    tModel.evaluate_model(["rmse", "rhs"], round=2)
    cScores = tModel.scores
    rmse = cScores['rmse']
    rhs = cScores['rhs']
    #print("The score for this model is: {}".format(rmse))
    
    print(x)
    print(rmse)
    print(rhs)
    msgStr = "The model variables are " + str(x) + "\tThe scores are: [RMSE:{:.4f}, RHS:{:.4f}]\n".format(rmse, rhs)
    print(msgStr)
    row = x.tolist() + [rmse, rhs]
    
    if saveToFile is not None:
        #print(msgStr)
        writer = csv.writer(saveToFile)
        #row = x.append(rmse)
        #row = x.append(rhs)
        writer.writerow(row)
        #saveToFile.write(msgStr)
    else:
        print(row)
    
    #Return RMSE as the performance metric to steer the search
    return rmse
    

# Optimize the parameters for the NN using DE

In [None]:
#Optimize the parameters for the NN using DE

#maxWindowSize = {'1':30, '2':20, '3':30, '4':18}
maxWindowSize = {'1':30, '2':20} #Do it only for datasets 1 and 2
totalTime = {'1':0, '2':0, '3':0, '4':0}
results = {'1':0, '2':0, '3':0, '4':0}

#datasetNumber = '1'

selected_features = ['T24', 'T30', 'T50', 'P30', 'Nf', 'Nc', 'Ps30', 'phi', 'NRf', 'NRc', 
                     'BPR', 'htBleed', 'W31', 'W32']



for datasetNumber in maxWindowSize:
    
    print("Tunning for dataset "+datasetNumber)
    file = open("results/MLP/intermediateResults_refactorized_"+datasetNumber+".csv", "w")

    windowSizeBounds = [1,maxWindowSize[datasetNumber]]
    windowStrideBounds = [1,10]
    constantRULBounds = [90,140]

    bounds = [windowSizeBounds, windowStrideBounds, constantRULBounds]
    #bounds = [windowStrideBounds, constantRULBounds] #Optimize only 2 variabes
    
    tModel.data_handler.change_dataset(datasetNumber)

    startTime = time.clock()
    tempResults = differential_evolution(nn_optmize_fun, bounds, 
                                     args=(tModel, 0, 20, file, 1),
                                    strategy='best1bin', maxiter=30, popsize=4, disp=True, polish=False)
    results[datasetNumber] = tempResults
    endTime = time.clock()

    file.close()
    totalTime[datasetNumber] = endTime - startTime

In [None]:
print("Total time {}".format(totalTime))
print(results)