# Artificial Neural Networks

In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score, mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import MeanAbsoluteError, MeanSquaredError, RootMeanSquaredError
from sklearn.model_selection import KFold
from numpy import sqrt
import tensorflow as tf
tf.config.optimizer.set_jit(True)
import pytz
import datetime
import time
import os
import joblib

In [11]:
# 'missile' variable represents the type of missile. 
# Two potential options include: 'sam_1' and 'sam_2'.
missile_list = ['sam_1', 'sam_2'] 

# 'angles' variable specifies the range of angles (in degrees). 
# The format is 'start_end', representing an angle range from 'start' to 'end'. 
# Possible options include: '0_144', '144_153', '153_162', '162_171', '171_180', '0_180'.
# angles_list = ['0_144', '144_153', '153_162', '162_171', '171_180', '0_180']
angles_list = ['0_144', '144_153', '153_162', '162_171', '171_180', '0_180']

In [None]:
for missile in missile_list:
    for angles in angles_list:
        # Specify the folder path to the data. 'missile' variable is defined earlier in the code
        folder_path = f'../data/{missile}'

        # Define the name of the CSV file, which contains experimental data related to different angles
        csv_file = f'wez_exp_out_{angles}.csv'  # 'angles' is defined earlier in the code

        # Read the CSV file into a pandas DataFrame, assuming it has a header row (header=0)
        df = pd.read_csv(f'../data/{missile}/{csv_file}', delimiter = ',', header=0)

        # Create feature matrix X by dropping the 'max_range' column from the dataframe
        X = df.drop('max_range', axis=1)

        # Create target vector y which only contains 'max_range' column
        y = df[['max_range']]

        # Split the data into training and testing sets, with 20% of data used for testing
        X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=42)

        # Instantiate a MinMaxScaler, which will normalize the features to be between 0 and 1
        scaler = MinMaxScaler()

        # Compute the minimum and maximum to be used for later scaling by fitting the scaler with training data
        scaler.fit(X_train)

        # Perform scaling on the training and test data using the fitted scaler
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)

        # Convert the target training and test data into numpy arrays for compatibility with some machine learning algorithms
        y_train = y_train.values
        y_test = y_test.values

        patience = 10
        epochs = 1_000_000
        batch_size = 16
        n_splits = 5
        list_of_hidden_layers = [2, 5, 10]
        list_of_units = [32, 64, 128]
        best_model = {'r2': 0, 'hidden_layers': 0, 'units': 0, 'epochs': 0}
        
        #Training
        for hidden_layers in list_of_hidden_layers:
            for units in list_of_units:
                print('\n------------------------------------------------------------------------------------------------------------------------------------------------')
                print(f'Starting a new training process with cross validation with {n_splits} folds.')
                print(f'Missile: {missile}, Model: {angles}, Hidden Layers: {hidden_layers}, Units: {units}')

                metrics = pd.DataFrame(columns=['fold','r2', 'rmse', 'mpe', 'training_time', 'inference_time_training', 'num_epochs'])

                # Define the K-fold Cross Validator
                kfold = KFold(n_splits=n_splits, shuffle=True)

                # K-fold Cross Validation model evaluation
                fold_no = 1
                for train, val in kfold.split(X_train, y_train):

                    # Define the model architecture
                    model = Sequential()

                    #input layer
                    model.add(Dense(units=X.shape[1], activation='relu'))
                    #https://machinelearningmastery.com/dropout-for-regularizing-deep-neural-networks/#:~:text=Dropout%20Rate,-The%20default%20interpretation&text=A%20good%20value%20for%20dropout,rate%2C%20such%20as%20of%200.8.
                    #model.add(Dropout(p))

                    #hidden layers
                    for i in range(hidden_layers):
                        model.add(Dense(units=units,activation='relu'))
                        #model.add(Dropout(p))

                    #output layer
                    model.add(Dense(units=1))

                    early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=patience)

                    # Compile the model for a regressionn problem
                    model.compile(loss='mse', optimizer='adam', metrics=[RootMeanSquaredError()])

                    # Generate a print
                    print('------------------------------------------------------------------------------------------------------------------------------------------------')
                    print(f'Training for fold {fold_no} ...')

                    # get the start time
                    st_wall = time.time()

                    # Fit data to model
                    history = model.fit(X_train[train], y_train[train],
                          epochs=epochs,
                          batch_size=batch_size,
                          validation_data=(X_train[val], y_train[val]), 
                          callbacks=[early_stop])

                    # get the end time
                    et_wall = time.time()

                    # get execution time
                    wall_time = et_wall - st_wall

                    #print('Training Execution time:', wall_time, 'seconds')

                    # get the start time
                    st_wall_inf = time.time()

                    # Generate generalization metrics
                    y_pred_val = model.predict(X_train[val])

                    # get the end time
                    et_wall_inf = time.time()

                    # get execution time
                    wall_time_inf = et_wall_inf - st_wall_inf

                    # Get the number of epochs
                    num_epochs = len(history.history['loss'])

                    #print('Inference Execution time:', wall_time_inf, 'seconds')

                    scores = [fold_no, r2_score(y_train[val], y_pred_val), sqrt(mean_squared_error(y_train[val], y_pred_val)), mean_absolute_percentage_error(y_train[val], y_pred_val), wall_time, wall_time_inf, num_epochs]

                    metrics.loc[len(metrics)] = scores

                    # Increase fold number
                    fold_no = fold_no + 1


                metrics.loc[len(metrics)] = ['mean', metrics['r2'].mean(), metrics['rmse'].mean(), metrics['mpe'].mean(), metrics['training_time'].mean(), metrics['inference_time_training'].mean(), metrics['num_epochs'].mean()]
                metrics.loc[len(metrics)] = ['std', metrics['r2'].iloc[:-1].std(), metrics['rmse'].iloc[:-1].std(), metrics['mpe'].iloc[:-1].std(), metrics['training_time'].iloc[:-1].std(), metrics['inference_time_training'].iloc[:-1].std(), metrics['num_epochs'].iloc[:-1].std()]  
                #metrics.loc[len(metrics)] = ['sum', metrics['r2'].iloc[:-2].sum(), metrics['rmse'].iloc[:-2].sum(), metrics['mpe'].iloc[:-2].sum(), metrics['training_time'].iloc[:-2].sum(),metrics['inference_time_training'].iloc[:-2].sum()]
                metrics = metrics.set_index('fold')

                path_to_save = f'./results/{missile}/training'

                if not os.path.exists(path_to_save):
                    os.makedirs(path_to_save)

                metrics.to_csv(f'{path_to_save}/{angles}-{units}_units-{hidden_layers}_hidden_layers.csv')
                
                if metrics['r2'].mean() > best_model['r2']:
                    best_model['r2'] =  metrics['r2'].mean()
                    best_model['hidden_layers'] = hidden_layers
                    best_model['units'] = units
                    best_model['epochs'] = int(metrics['num_epochs'].max())
        
        print(f"\n The best model for missile {missile} and model {angles} is:")
        print(f"R2: {best_model['r2']}, Hidden Layers: {best_model['hidden_layers']}, Units: {best_model['units']}\n")

        #Test
        # Final Model
        hidden_layers = best_model['hidden_layers']
        units = best_model['units']
        epochs = best_model['epochs'] 

        print('------------------------------------------------------------------------------------------------------------------------------------------------')
        print(f'Starting a new training process for the final model.')
        print(f'Missile: {missile}, Model: {angles}, Hidden Layers: {hidden_layers}, Units: {units}')

        metrics = pd.DataFrame(columns=['r2', 'rmse', 'mpe', 'training_time', 'inference_time_training'])

        # Define the model architecture
        model = Sequential()

        #input layer
        model.add(Dense(units=X.shape[1], activation='relu'))

        #hidden layers
        for i in range(hidden_layers):
            model.add(Dense(units=units,activation='relu'))

        #output layer
        model.add(Dense(units=1))

        #early stop
        early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1)

        # Compile the model for a regressionn problem
        model.compile(loss='mse', optimizer='adam', metrics=[RootMeanSquaredError()])

        # get the start time
        st_wall = time.time()

        # Fit data to model
        model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size)

        # get the end time
        et_wall = time.time()

        # get execution time
        wall_time = et_wall - st_wall

        # get the start time
        st_wall_inf = time.time()

        # Generate generalization metrics
        y_pred = model.predict(X_test)

        # get the end time
        et_wall_inf = time.time()

        # get execution time
        wall_time_inf = et_wall_inf - st_wall_inf

        scores = [r2_score(y_test, y_pred), sqrt(mean_squared_error(y_test, y_pred)), mean_absolute_percentage_error(y_test, y_pred), wall_time, wall_time_inf]

        metrics.loc[len(metrics)] = scores

        path_to_save = f'./results/{missile}/test'

        if not os.path.exists(path_to_save):
            os.makedirs(path_to_save)

        metrics.to_csv(f'{path_to_save}/{angles}-{units}_units-{hidden_layers}_hidden_layers.csv')



------------------------------------------------------------------------------------------------------------------------------------------------
Starting a new training process with cross validation with 5 folds.
Missile: 48N6ES, Model: 0_144, Hidden Layers: 2, Units: 32
------------------------------------------------------------------------------------------------------------------------------------------------
Training for fold 1 ...
Epoch 1/1000000
Epoch 2/1000000
Epoch 3/1000000
Epoch 4/1000000
Epoch 5/1000000
Epoch 6/1000000
Epoch 7/1000000
Epoch 8/1000000
Epoch 9/1000000
Epoch 10/1000000
Epoch 11/1000000
Epoch 12/1000000
Epoch 13/1000000
Epoch 14/1000000
Epoch 15/1000000
Epoch 16/1000000
Epoch 17/1000000
Epoch 18/1000000
Epoch 19/1000000
Epoch 20/1000000
Epoch 21/1000000
Epoch 22/1000000
Epoch 23/1000000
Epoch 24/1000000
Epoch 25/1000000
Epoch 26/1000000
Epoch 27/1000000
Epoch 28/1000000
Epoch 29/1000000
Epoch 30/1000000
Epoch 31/1000000
Epoch 32/1000000
Epoch 33/1000000
Epoch 