##### Imports

In [16]:
import os
# TENSORFLOW LOGS:
# 0 = all messages are logged (default behavior)
# 1 = INFO messages are not printed
# 2 = INFO and WARNING messages are not printed
# 3 = INFO, WARNING, and ERROR messages are not printed
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

In [17]:
import os
import math
import pickle
import collections

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.python.keras.api import keras

from IPython.display import clear_output

from hpcscripts.sharedutils.trainingutils import LoadModel, SetLowTFVerbose, MakeSinglePrediction, CreateWindowGenerator, GetFileList, TrainModel
from hpcscripts.sharedutils.nomalization import DF_Nomalize, denorm
from hpcscripts.sharedutils.modelutils import SelectModelPrompt
from hpcscripts.trainers.anntrainer import CreateANNModel, ImportCombinedTrainingData
from hpcscripts.trainers.modeldefinitions import  MODEL_DEFINITIONS
from hpcscripts.trainers import modeldefinitions as mdef
from hpcscripts.trainers import anntrainer
from hpcscripts.option import pathhandler as ph
from hpcscripts.option import globalparams as G_PARAMS

# Codes

#### Functions Defninition

In [18]:
def create_binary(digits_num: int, init_val: int=0):
    return [init_val for i in range (digits_num)]

def increment_binary(binary: list):
    hold = 0
    for i in range (len(binary)):
        index = len(binary) - i - 1
        
        if index == 0 and binary[index] == 1 and hold > 0:
            return -1
        if i == 0:
            binary [index] += 1

        binary [index] += hold
        hold = 0

        if binary[index] > 1:
            binary[index] = 0
            hold = 1
    
    return binary

def create_possibility_mask(digits_num: int):
    possibilities = []
    binary = create_binary(digits_num)

    while True:

        binary = increment_binary(binary)

        if binary == -1:
            break

        possibilities.append(binary.copy())

    return possibilities

def create_possible_features(feature_list: list):
    digits_num = len (feature_list)
    possi_mask = create_possibility_mask(digits_num)

    feature_possibs = []
    for possi in possi_mask:
        new_features = []
        for mask, feature in zip(possi, feature_list):
            if mask == 1:
                new_features.append(feature)

        feature_possibs.append(new_features)
    
    return feature_possibs, possi_mask

In [19]:
def print_list(the_list: list):
    for element in the_list:
        print (element)
        
def bin_to_index(binary: str = "1010"):
    index = -1
    for n, letter in enumerate(binary[::-1]):
        if letter == "1":
            index += 2**n
    return index

#### Create Possible Labels Combinations

In [20]:
feature_list = [
                'hralt_m', 'theta_rad', 'aoac_rad', 'cas_mps', 'hdot_1_mps',
                'gamma_error_rad', 'g_err_d_rad', 'g_err_i_rad',
                'tailwind_mps', 'crosswind_mps',
                'use_flap'
                ]
                # 'flap_4_bool', 'flap_5_bool', 'flap_6_bool'
# label_list = ["hralt_m", "theta_rad", "aoac_rad", "cas_mps"]
# feature_list = ["hralt_m", "theta_rad"]

feature_possibs, possi_mask = create_possible_features(feature_list)

for i, feature_poss in enumerate (feature_possibs):
    if 'use_flap' in feature_poss:
        feature_poss.remove('use_flap')
        feature_poss = feature_poss + ['flap_4_bool', 'flap_5_bool', 'flap_6_bool']

        feature_possibs[i] = feature_poss


# For Elevator
feature_possibs = [ 
                        ['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps', ], 
                        ['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps', 'gamma_error_rad'], 
                        ['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps', 'gamma_error_rad', 'g_err_d_rad'], 
                        ['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps', 'gamma_error_rad', 'g_err_d_rad', 'g_err_i_rad'], 
                        ['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps', 'gamma_error_rad', 'g_err_d_rad', 'g_err_i_rad', 'tailwind_mps'], 
                        ['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps', 'gamma_error_rad', 'g_err_d_rad', 'tailwind_mps'], 
                        #['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps', 'gamma_error_rad', 'g_err_d_rad', 'tailwind_mps', 'flap_4_bool', 'flap_5_bool', 'flap_6_bool']
                    ]

# For Aileron
feature_possibs = [ 
                        ['phi_rad',], 
                        ['phi_rad', 'loc_dev_ddm',], 
                        ['phi_rad', 'loc_dev_ddm', 'crosswind_mps',], 
                        ['phi_rad', 'loc_dev_ddm', 'crosswind_mps', 'hralt_m'], 
                    ]

print ("Num. of possibilities: {}".format(len(feature_possibs)))
print_list(feature_possibs)

Num. of possibilities: 6
['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps']
['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps', 'gamma_error_rad']
['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps', 'gamma_error_rad', 'g_err_d_rad']
['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps', 'gamma_error_rad', 'g_err_d_rad', 'g_err_i_rad']
['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps', 'gamma_error_rad', 'g_err_d_rad', 'g_err_i_rad', 'tailwind_mps']
['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps', 'gamma_error_rad', 'g_err_d_rad', 'tailwind_mps']


In [21]:
# Find index
check_feature = [
            'hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps', 
            'gamma_error_rad', 'tailwind_mps', 'g_err_d_rad', # 'crosswind_mps',
            'flap_4_bool', 'flap_5_bool', 'flap_6_bool',
    ]
current_index = 0

if False:
    for i, feature_poss in enumerate (feature_possibs):
        
        if collections.Counter(feature_poss) == collections.Counter(check_feature):
            current_index = i
            print ("index -> {}".format(current_index))
            print ("features -> {}".format(feature_poss))
            break

    elapsed_time = 137 # Hours since 1.00 AM 20th July 2022
    total_days = len(feature_possibs)/(current_index/elapsed_time)/24
    remaining_days = total_days - elapsed_time/24

    print ()
    print ("total_days: \t\t{:.2f}".format(total_days))
    print ("remaining_days: \t{:.2f}".format(remaining_days))

In [22]:
# feature_possibs[bin_to_index("1011")]

### Search for the best features

#### Functions Definition

In [23]:
def search_features(model_id: str, itter_times:int, collected_list:list, filename: str = "features_search.pkl"):
    
    # # Model Definition
    # tf.keras.backend.clear_session()
    # if model_id != None:
    #     if not model_id in MODEL_DEFINITIONS:
    #         print ("Err: invalid model id -> {}".format(model_id))
    #         exit (1)

    #     G_PARAMS.MODEL_ID = model_id
    #     G_PARAMS.ApplyModelDefinition(
    #         mdef.MODEL_DEFINITIONS[G_PARAMS.MODEL_ID]
    #     )

    # # Import Data
    # train_comb= ImportCombinedTrainingData()

    # # Pre-process Data
    # train_comb, norm_param = DF_Nomalize(train_comb)
    # train_list, test_list, eval_list = GetFileList()

    # # Create WindowGenerator
    # windowG = CreateWindowGenerator(train_list, 
    #                 test_list, eval_list, norm_param)
    # test_dat = windowG.test

    for features in feature_possibs:
        val_data = {}
        val_data_avg = {}

        feature_summary = {}

        G_PARAMS.FEATURE_COLUMNS = features
        for i in range (itter_times):
            clear_output(wait=True)
            print ("i = {}, feature -> {}".format(i, features))
            model, history = anntrainer.run(
                model_id,
                save_model = False,
                return_model = True
            )

            print ()

            # _, test_mse, test_mae = model.evaluate(test_dat)
            # val_data['val_test_mean_squared_error'] = val_data.get('val_test_mean_squared_error', [])
            # val_data['val_test_mean_squared_error'].append(test_mse)

            # val_data['val_test_mean_absolute_error'] = val_data.get('val_test_mean_absolute_error', [])
            # val_data['val_test_mean_absolute_error'].append(test_mae)

            for key in history.history.keys():
                if not key.startswith('val'):
                    continue
                min_value = min (history.history[key])
                min_epoch = history.history[key].index(min_value)
                # print ("Minimum of {:<26} ->  {} \t on epoch ->  {}".format(key, min_value, min_index)) 

                val_data[key] = val_data.get(key, [])
                val_data[key].append(min_value)

                # Min Index
                val_data[key + '_epoch'] = val_data.get(key + '_epoch', [])
                val_data[key + '_epoch'].append(min_epoch)
                

        for key in val_data.keys():
            val_data_avg[key] = np.average (val_data[key])

        feature_summary['features']     = features
        feature_summary['performance']  = val_data_avg

        collected_list.append(feature_summary)
        
        # Save collected list
        with open(filename, 'wb') as f:
            pickle.dump(collected_list, f)


    # Save collected list
    with open(filename, 'wb') as f:
        pickle.dump(collected_list, f)
    print ("picke saved")


#### Codes

In [24]:
model_id = 'simp_dense_ail'
itter_times = 5
collected_list = []

if False:
    with open("features_search.pkl", 'rb') as f:
        collected_list = pickle.load(f)    

if False: 
    search_features(model_id, itter_times, collected_list, filename='features_performance.pkl')

In [25]:
with open("features_performance.pkl", 'rb') as f:
    loaded_result = pickle.load(f)

loaded_result

[{'features': ['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps'],
  'performance': {'val_loss': 0.5203588008880615,
   'val_loss_epoch': 1.4,
   'val_mean_squared_error': 0.5203588008880615,
   'val_mean_squared_error_epoch': 1.4,
   'val_mean_absolute_error': 0.46365341544151306,
   'val_mean_absolute_error_epoch': 2.6}},
 {'features': ['hralt_m',
   'hdot_1_mps',
   'theta_rad',
   'cas_mps',
   'gamma_error_rad'],
  'performance': {'val_loss': 0.5085821628570557,
   'val_loss_epoch': 2.0,
   'val_mean_squared_error': 0.5085821628570557,
   'val_mean_squared_error_epoch': 2.0,
   'val_mean_absolute_error': 0.45907732248306277,
   'val_mean_absolute_error_epoch': 1.8}},
 {'features': ['hralt_m',
   'hdot_1_mps',
   'theta_rad',
   'cas_mps',
   'gamma_error_rad',
   'g_err_d_rad'],
  'performance': {'val_loss': 0.5053481698036194,
   'val_loss_epoch': 2.8,
   'val_mean_squared_error': 0.5053481698036194,
   'val_mean_squared_error_epoch': 2.8,
   'val_mean_absolute_error': 0.4580515265

In [26]:
new_dict = {}
params = ['val_mean_squared_error', 'val_mean_absolute_error']

for i, result in enumerate(loaded_result):
    new_dict['features'] = new_dict.get('features', [])
    new_dict['features'].append(result['features'])

    for param in params:
        new_dict[param] = new_dict.get(param, [])
        new_dict[param].append(result['performance'][param])

new_dict

{'features': [['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps'],
  ['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps', 'gamma_error_rad'],
  ['hralt_m',
   'hdot_1_mps',
   'theta_rad',
   'cas_mps',
   'gamma_error_rad',
   'g_err_d_rad'],
  ['hralt_m',
   'hdot_1_mps',
   'theta_rad',
   'cas_mps',
   'gamma_error_rad',
   'g_err_d_rad',
   'g_err_i_rad'],
  ['hralt_m',
   'hdot_1_mps',
   'theta_rad',
   'cas_mps',
   'gamma_error_rad',
   'g_err_d_rad',
   'g_err_i_rad',
   'tailwind_mps'],
  ['hralt_m',
   'hdot_1_mps',
   'theta_rad',
   'cas_mps',
   'gamma_error_rad',
   'g_err_d_rad',
   'tailwind_mps']],
 'val_mean_squared_error': [0.5203588008880615,
  0.5085821628570557,
  0.5053481698036194,
  0.5106477379798889,
  0.49020100235939024,
  0.48692888021469116],
 'val_mean_absolute_error': [0.46365341544151306,
  0.45907732248306277,
  0.4580515265464783,
  0.461227947473526,
  0.4360851228237152,
  0.43384069204330444]}

In [27]:
new_df = pd.DataFrame(new_dict)


for feat in new_dict['features']:
    print (feat)

new_df

['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps']
['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps', 'gamma_error_rad']
['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps', 'gamma_error_rad', 'g_err_d_rad']
['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps', 'gamma_error_rad', 'g_err_d_rad', 'g_err_i_rad']
['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps', 'gamma_error_rad', 'g_err_d_rad', 'g_err_i_rad', 'tailwind_mps']
['hralt_m', 'hdot_1_mps', 'theta_rad', 'cas_mps', 'gamma_error_rad', 'g_err_d_rad', 'tailwind_mps']


Unnamed: 0,features,val_mean_squared_error,val_mean_absolute_error
0,"[hralt_m, hdot_1_mps, theta_rad, cas_mps]",0.520359,0.463653
1,"[hralt_m, hdot_1_mps, theta_rad, cas_mps, gamm...",0.508582,0.459077
2,"[hralt_m, hdot_1_mps, theta_rad, cas_mps, gamm...",0.505348,0.458052
3,"[hralt_m, hdot_1_mps, theta_rad, cas_mps, gamm...",0.510648,0.461228
4,"[hralt_m, hdot_1_mps, theta_rad, cas_mps, gamm...",0.490201,0.436085
5,"[hralt_m, hdot_1_mps, theta_rad, cas_mps, gamm...",0.486929,0.433841


In [28]:
new_df.loc[new_df['val_mean_absolute_error'] == new_df['val_mean_absolute_error'].min()] 

Unnamed: 0,features,val_mean_squared_error,val_mean_absolute_error
5,"[hralt_m, hdot_1_mps, theta_rad, cas_mps, gamm...",0.486929,0.433841


In [29]:
new_df.columns = ['Features', 'MSE', 'MAE']

In [30]:
new_df.to_latex(
        'ch3_feature_eng_20.tex',
        index=False,
        caption="Pengaruh Features Terhadap Perfroma Model (MSE dan MAE)", 
        label='tab:ch3_feature_eng'
    )

  new_df.to_latex(


In [None]:
if False:
    print ("Loaded from pkl: {}".format(len (loaded_result)))

    sorted_features = sorted(loaded_result, key=lambda x: x['performance']['val_mean_absolute_error'])
    best_10s = sorted_features[:100]

In [None]:
# feat_counts = {}

# for data in best_10s:
#     for feature in data['features']:
#         feat_counts[feature] = feat_counts.get(feature, 0) + 1

# feat_counts

In [None]:
#best_10s[0]

In [None]:
#loaded_result [1780]