In [1]:
#- main machine learning libraries used

# import tensorflow
import tensorflow as tf
import tensorflow.keras as keras  
import sklearn
from sklearn.metrics import mean_squared_error


#- other libraries used
import itertools
import numpy as np
import os
import math
import pandas as pd
import matplotlib.pyplot as plt 
from platform import python_version
from datetime import datetime as dt

In [2]:
python_version()

'3.9.12'

In [3]:
today = dt.today()
print('latest run on:', today)

latest run on: 2022-06-09 14:54:13.449449


In [4]:
now1 = dt.now()
now1

datetime.datetime(2022, 6, 9, 14, 54, 13, 454875)

## check tensorflow version

In [5]:
print (tf.__version__)

2.9.1


## my functions(.) here

## input directory

In [6]:
dataset_dir = "/Users/ehsanmos/MLP_dataset"

In [7]:
if os.path.isdir(dataset_dir) == False:
    print("dataset directory NOT found!")
else:
    print("dataset directory FOUND!")

dataset directory FOUND!


## Load input/ training dataset

before doing this section, process filter final input dataset with "check_n_filter_final_dataset"

In [8]:
in_ds = "atmmodel_april_2016_k_zero_9cams4bands_preprocessed.csv"

## check if input dataset file exists

In [9]:
in_ds_fullpath = os.path.join(dataset_dir, in_ds)
print(in_ds_fullpath)

if (not os.path.isfile(os.path.join(in_ds_fullpath))):
    raise SystemExit()
else:
    print("input dataset found!")

/Users/ehsanmos/MLP_dataset/atmmodel_april_2016_k_zero_9cams4bands_preprocessed.csv
input dataset found!


## Read in dataset and look at dataset columns

In [10]:
df_orig = pd.read_csv(in_ds_fullpath, engine='python')

In [11]:
df_orig.shape

(40775, 15)

In [12]:
df_orig.columns

Index(['firstLat', 'firstLon', 'anr', 'ang', 'anb', 'annir', 'aa', 'af', 'ba',
       'bf', 'ca', 'cf', 'da', 'df', 'rms'],
      dtype='object')

Note: we will build an input dataset with 9 cameras to train the mlp model

In [13]:
#- build dataset with 9 cameras
excluce_columns = ['firstLat', 'firstLon', 'ang', 'anb', 'annir']
input_ds_for_training = df_orig.drop(excluce_columns, axis=1)

print('we are using %s columns in our training dataset:' %len(input_ds_for_training.columns))
print(input_ds_for_training.columns)  # columns should be only 9 cameras + rms 


we are using 10 columns in our training dataset:
Index(['anr', 'aa', 'af', 'ba', 'bf', 'ca', 'cf', 'da', 'df', 'rms'], dtype='object')


## shuffle rows of input dataset

In [14]:
from sklearn.utils import shuffle

input_ds_for_training = shuffle(input_ds_for_training)

## Split dataset to train-test parts for training algorithms
- we devided to plit our dataset to 2 parts (2-part split)
- Here we use the ‘train_test_split’ to split the data in 80:20 ratio i.e. 80% of the data will be used for training the model while 20% will be used for testing the model that is built out of it.
- note: last column should be label == rms

In [15]:
#- split data set to X and Y

X = input_ds_for_training.iloc[:, :-1] # to select up to last column of dataset OR [:, 0:3]
Y = input_ds_for_training.iloc[:, -1:] # to select last column of DF

print(X.shape)
print(Y.shape)

(40775, 9)
(40775, 1)


In [16]:
#- now split dataset to train-test

from sklearn.model_selection import train_test_split

#- we use this function to split data-- from here because we are usiong SKlearn library, we change all data structures from Pandas DF to numpy
# X_train, X_test, y_train, y_test = train_test_split(X.to_numpy(), Y.to_numpy(), test_size=0.2, random_state=123) # Q- input is DF or numpy array?

test_data_size = 0.3
print("test size= %d percent" %(test_data_size*100))
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=test_data_size, random_state=123) # Q- input is DF or numpy array?


print("train:")
print(x_train.shape)
print(y_train.shape)

print("test:")
print(x_test.shape)
print(y_test.shape)


test size= 30 percent
train:
(28542, 9)
(28542, 1)
test:
(12233, 9)
(12233, 1)


Qn- how about train-val-test (3 sections)? is this for DL?

## Feature scalling

change the scale/range of input features from their original range to a new range. So changed features will have mean=0 and std=1.

source: https://www.enjoyalgorithms.com/blog/need-of-feature-scaling-in-machine-learning

- We rescale data after we split data to train-test
- all features have the same range to reduce bias in data 
- perform this step before splitting data into train-test split
- We normalize data using the training data

Qn- why FS is important? why we do FS?

Qn- which method? 

1) standardization/ Z-score/ StandardScaler() == mean=0 & std=1; Standardize features by removing the mean and scaling to unit variance; good for datasets w/ outliers;

2) MinMaxScalar() == Transform features by scaling each feature to a given range (usually [0,1])

3) normalize() == Scale input vectors individually to unit norm (vector length).

source: https://scikit-learn.org/stable/modules/classes.html#module-sklearn.preprocessing



### 1) Using MinMaxScaler() method to rescale input features

In [17]:
# #- import necessary libraries for Neural Nets
# from sklearn.preprocessing import MinMaxScaler

# #- fit data

# scaler_x = MinMaxScaler()
# scaler_y = MinMaxScaler()

# scaler_x.fit(X_train) # returns Fitted scaler
# X_train_scaled = scaler_x.transform(X_train)  # transforms data

# scaler_x.fit(X_test)
# X_test_scaled = scaler_x.transform(X_test)

# scaler_y.fit(y_train)
# y_train_scaled = scaler_y.transform(y_train)

# scaler_y.fit(y_test)
# y_test_scaled = scaler_y.transform(y_test)

### 2) Using StandardScaler() method 

to rescale input features to mean of 0 and std of 1

In [18]:
from sklearn.preprocessing import StandardScaler

scaler_x = StandardScaler()
scaler_y = StandardScaler()

x_train_scaled = scaler_x.fit(x_train).transform(x_train) # returns daata w/ mean 0 & std 1
y_train_scaled = scaler_y.fit(y_train).transform(y_train)
x_test_scaled = scaler_x.fit(x_test).transform(x_test)
y_test_scaled = scaler_y.fit(y_test).transform(y_test)

Check types of input dataset data structure; should be 2D arrays, or Pandas DataFrame

In [19]:
print(type(x_train_scaled))
# print(type(y_train_scaled))
# print(type(x_test_scaled))
# print(type(y_test_scaled))

print(x_train_scaled.mean())
print(x_train_scaled.std())
print(y_train_scaled.mean())
print(y_train_scaled.std())
print(x_test_scaled.mean())
print(x_test_scaled.std())
print(y_test_scaled.mean())
print(y_test_scaled.std())

<class 'numpy.ndarray'>
7.778191098332289e-17
0.8819171036881969
-3.310986751317123e-17
1.0
1.8070607374663076e-17
0.8819171036881969
2.24204607212784e-16
0.9999999999999999


In [20]:
x_train_scaled.shape

(28542, 9)

## >>> Neural Network (Regression) >>>

### Building the NN model
Q- how find the best architecture? for mlp? 

let's do a architecture search

source: https://towardsdatascience.com/how-to-find-optimal-neural-network-architecture-with-tensorflow-the-easy-way-50575a03d060

In [21]:
def get_models(num_layers: int,
               min_nodes_per_layer: int,
               max_nodes_per_layer: int,
               node_step_size: int,
               input_shape: tuple,
               hidden_layer_activation: str = 'relu',
               num_nodes_at_output: int = 1,
               output_layer_activation: str = 'linear') -> list:   
        
        # https://stackoverflow.com/questions/14379753/what-does-mean-in-python-function-definitions
        # allowing you to attach metadata to functions, describe their parameters (their expected types) and return values
    
    # creates a list from nodes that we have defined [min, max, step]
    node_options = list(range(min_nodes_per_layer, 
                              max_nodes_per_layer + 1, 
                              node_step_size))
    
    # make a collection of sets of nodes for each hidden layer
    layer_possibilities = [node_options] * num_layers
    layer_node_permutations = list(itertools.product(*layer_possibilities))
#     print('permutations:')
#     print(layer_node_permutations)
    
    models = []
    for permutation in layer_node_permutations:
        
        # setup input layer
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.InputLayer(input_shape=input_shape, name="input-layer"))
        model_name = ''
        
        # setup hidden layers
        for nodes_at_layer in permutation:
            model.add(tf.keras.layers.Dense(nodes_at_layer, activation=hidden_layer_activation))
            model_name += 'dense_'+str(nodes_at_layer)+'_'

        # setup output layer
        model.add(tf.keras.layers.Dense(num_nodes_at_output, activation=output_layer_activation, name="output-layer-SIR"))
        model._name = model_name[:-1]
        models.append(model)
        
    return models

In [22]:
def optimize(models: list,
             X_train: np.array,
             y_train: np.array,
             X_test: np.array,
             y_test: np.array,
             epochs: int,
             batch_size: int, 
             validation_split: float,
             verbose: int = 0) -> pd.DataFrame:
    

    results = [] # We will store the results in this list
    
    ##############################################
    def train(model: tf.keras.Sequential) -> dict:
        
        adam_lrt = 0.001
        opt_alg = tf.keras.optimizers.Adam(
                    learning_rate=adam_lrt,   # then everu step * 10 to get to 10
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-07,
                    amsgrad=False,
                    name='Adam'
        )
        
        # Compile each model
        model.compile(
            loss='mse', 
            optimizer=opt_alg, 
            metrics=['mse','mae']
        )

        # Train the model/ fit alg. to dataset
        model.fit(
            X_train,
            y_train,
            epochs=epochs,
            verbose=verbose,
            batch_size=batch_size,
            validation_split=validation_split
            
        )
        

        # Make predictions on the test set
        y_pred_scaled = model.predict(X_test, verbose=verbose)  # Generates output predictions for the input samples
        # inverse y-pred to original scale
        y_pred = scaler_y.inverse_transform(y_pred_scaled)
        # compare y & y-hat in original scale
        rmse_pred_test = math.sqrt(mean_squared_error(y_test, y_pred))
        
        
#         # Evaluation on test data
#         preds = model.evaluate(
#             X_test, 
#             y_test, 
#             batch_size=20, 
#             verbose=0) # Returns the loss value & metrics values for the model in test mode
#         print('return of evaluate(.)')
#         print(preds)
#         mse_test = preds[1]
#         mae_test = preds[2]
        

        # Return evaluation metrics on the test set
        return {
            'model_name': model.name,
            #'test_rmse': math.sqrt(mse_test),
            #'test_mae': mae_test,
            'test_pred_rmse (cm)': round(rmse_pred_test, 1)
        }
    
    ##############################################
    # Train every model and save results in the list above and tur it to DF
    for model in models:
        try:
            print(model.name, end=' ... \n')
            res = train(model=model)
            results.append(res)
        except Exception as e:
            print(f'{model.name} --> {str(e)}') # how change to nornal print?
        
    return pd.DataFrame(results)

## Building all models

iterate with different hidden layers + iterate over several node options in one single hidden layer

In [23]:
num_layers = 3

all_models = get_models(
    num_layers=num_layers, 
    min_nodes_per_layer=6, 
    max_nodes_per_layer=300, 
    node_step_size=24, 
    input_shape=(9,) # should be tuple
)

2022-06-09 14:54:14.094756: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [24]:
# all_models[0].summary()

## Running the optimization

In [25]:
optimization_results = optimize(
    models=all_models,
    X_train=x_train_scaled,
    y_train=y_train_scaled,
    X_test=x_test_scaled,
    y_test=y_test,
    epochs=50,
    batch_size=100,
    validation_split=0.2
)

dense_6_dense_6_dense_6 ... 
dense_6_dense_6_dense_30 ... 
dense_6_dense_6_dense_54 ... 
dense_6_dense_6_dense_78 ... 
dense_6_dense_6_dense_102 ... 
dense_6_dense_6_dense_126 ... 
dense_6_dense_6_dense_150 ... 
dense_6_dense_6_dense_174 ... 
dense_6_dense_6_dense_198 ... 
dense_6_dense_6_dense_222 ... 
dense_6_dense_6_dense_246 ... 
dense_6_dense_6_dense_270 ... 
dense_6_dense_6_dense_294 ... 
dense_6_dense_30_dense_6 ... 
dense_6_dense_30_dense_30 ... 
dense_6_dense_30_dense_54 ... 
dense_6_dense_30_dense_78 ... 
dense_6_dense_30_dense_102 ... 
dense_6_dense_30_dense_126 ... 
dense_6_dense_30_dense_150 ... 
dense_6_dense_30_dense_174 ... 
dense_6_dense_30_dense_198 ... 
dense_6_dense_30_dense_222 ... 
dense_6_dense_30_dense_246 ... 
dense_6_dense_30_dense_270 ... 
dense_6_dense_30_dense_294 ... 
dense_6_dense_54_dense_6 ... 
dense_6_dense_54_dense_30 ... 
dense_6_dense_54_dense_54 ... 
dense_6_dense_54_dense_78 ... 
dense_6_dense_54_dense_102 ... 
dense_6_dense_54_dense_126 ... 
dens

dense_30_dense_150_dense_150 ... 
dense_30_dense_150_dense_174 ... 
dense_30_dense_150_dense_198 ... 
dense_30_dense_150_dense_222 ... 
dense_30_dense_150_dense_246 ... 
dense_30_dense_150_dense_270 ... 
dense_30_dense_150_dense_294 ... 
dense_30_dense_174_dense_6 ... 
dense_30_dense_174_dense_30 ... 
dense_30_dense_174_dense_54 ... 
dense_30_dense_174_dense_78 ... 
dense_30_dense_174_dense_102 ... 
dense_30_dense_174_dense_126 ... 
dense_30_dense_174_dense_150 ... 
dense_30_dense_174_dense_174 ... 
dense_30_dense_174_dense_198 ... 
dense_30_dense_174_dense_222 ... 
dense_30_dense_174_dense_246 ... 
dense_30_dense_174_dense_270 ... 
dense_30_dense_174_dense_294 ... 
dense_30_dense_198_dense_6 ... 
dense_30_dense_198_dense_30 ... 
dense_30_dense_198_dense_54 ... 
dense_30_dense_198_dense_78 ... 
dense_30_dense_198_dense_102 ... 
dense_30_dense_198_dense_126 ... 
dense_30_dense_198_dense_150 ... 
dense_30_dense_198_dense_174 ... 
dense_30_dense_198_dense_198 ... 
dense_30_dense_198_dense

KeyboardInterrupt: 

In [None]:
optimization_results

In [None]:
# write to disk
output_dir = dataset_dir
output_file = 'mlp_search_results_num_layers_'+str(num_layers)+'.csv'
output_fp = os.path.join(output_dir, output_file)
optimization_results.to_csv(output_fp, index=False)
output_fp

In [None]:
now2 = dt.now()
runtime = now2-now1
print(runtime)