In [28]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow.keras.callbacks import TensorBoard
import tensorflow as tf
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.linear_model import Ridge, HuberRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import warnings
warnings.filterwarnings("ignore")

## 1.0 Model Function

In [96]:
def buildNN (data, num_hidden_layers = 2, hidden_nodes = 256, act = 'relu', loss_function = 'mean_absolute_error'):
    
    NN_model = Sequential()
    
    # The Input Layer :
    input_layer= Input(shape=(data.shape[1],))
    NN_model.add(input_layer)
    
    # The Hidden Layers :
    for i in range(num_hidden_layers):
        NN_model.add(Dense(hidden_nodes, kernel_initializer='normal',activation=act))

    # The Output Layer :
    NN_model.add(Dense(1, kernel_initializer='normal',activation='linear'))

    # Compile the network :
    NN_model.compile(loss= loss_function, optimizer='adam', metrics=['mean_absolute_error', 'mean_squared_error'])
    
    return NN_model

In [97]:
def buildNN_adv (data, num_hidden_layers = 2, hidden_nodes = 256, act = 'relu', do = 0.2, regularizer = True,
                loss_function = 'mean_absolute_error'):
    
    if regularizer:
        reg = tf.keras.regularizers.l2(l=0.01)
    else:
        reg = None
        
    NN_model = Sequential()
    
    # The Input Layer :
    input_layer= Input(shape=(data.shape[1],))
    NN_model.add(input_layer)

    # The Hidden Layers :
    for i in range(num_hidden_layers):
        NN_model.add(Dense(hidden_nodes, kernel_initializer='normal',activation=act, kernel_regularizer = reg))
        NN_model.add(Dropout(do))

    # The Output Layer :
    NN_model.add(Dense(1, kernel_initializer='normal',activation='linear'))

    # Compile the network :
    NN_model.compile(loss= loss_function, optimizer='adam', metrics=['mean_absolute_error', 'mean_squared_error'])
    
    return NN_model

In [3]:
def evaluateModel (model, X, y):
    y_pred = model.predict(X)
    
    mse = mean_squared_error(y,y_pred)
    r2 = r2_score(y,y_pred)
    mae = mean_absolute_error(y,y_pred)
    
    return mse, r2, mae

## 2.0 Data

In [4]:
data = pd.read_csv('input/processed_data_nyc.csv', index_col = 0)
data.head()

Unnamed: 0,latitude,longitude,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,all_year_avail,low_avail,...,neighbourhood_Williamsburg,neighbourhood_Willowbrook,neighbourhood_Windsor Terrace,neighbourhood_Woodhaven,neighbourhood_Woodlawn,neighbourhood_Woodrow,neighbourhood_Woodside,room_type_Entire home/apt,room_type_Private room,room_type_Shared room
0,40.64749,-73.97237,5.010635,1,9,2762,0.21,6,True,False,...,0,0,0,0,0,0,0,0,1,0
1,40.75362,-73.98377,5.420535,1,45,2976,0.38,2,True,False,...,0,0,0,0,0,0,0,1,0,0
2,40.80902,-73.9419,5.01728,3,0,0,0.0,1,True,False,...,0,0,0,0,0,0,0,0,1,0
3,40.68514,-73.95976,4.49981,1,270,3021,4.64,1,False,False,...,0,0,0,0,0,0,0,1,0,0
4,40.79851,-73.94399,4.394449,10,9,2793,0.1,1,False,True,...,0,0,0,0,0,0,0,1,0,0


In [5]:
y = data.price
data = data.drop(['price'], axis=1)

X = np.asarray(data).astype(np.float32)
y = np.asarray(y).ravel()

Data Splitting

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Training Dataset: {}".format(X_train.shape))
print("Testing Dataset: {}".format(X_test.shape))

Training Dataset: (39014, 239)
Testing Dataset: (9754, 239)


Scaling Data:

In [7]:
scaler = preprocessing.RobustScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

## 3.0 Testing Neural Network Architectures

### 3.1 Basic Architectures and Activations

In [46]:
EPOCHS = 30
BATCH_SIZE = 256

NUM_LAYERS = [1,2,4,6]
NUM_BASE_NODES = [8,32,128,512]
ACTS = ['tanh','relu']


mse_final = []
mae_final = []
r2_final = []
names = []

for numLayers in NUM_LAYERS:
    for numNodes in NUM_BASE_NODES:
        for act in ACTS:
            LOGNAME = "{}-{}-{}-Epochs={}-TIME={}".format(act,numLayers, numNodes , EPOCHS, int(time.time()) )
            print(LOGNAME)
            tensorboard = TensorBoard(log_dir='logs/NN/basic/{}'.format(LOGNAME))
            nn_model = buildNN(X, num_hidden_layers = numLayers, 
                               hidden_nodes = numNodes,
                               act = act)
            nn_model.fit(X_train, y_train, epochs = EPOCHS, batch_size = BATCH_SIZE, 
#                          callbacks = [tensorboard],
                        validation_split = 0.2)

            modelName = "{}-{}-{}".format(act,numLayers, numNodes)
            mse, r2, mae = evaluateModel(nn_model, X_test, y_test)

            print('RESULTS:')
            print('MSE = {}, MAE = {}, R2 = {}'.format(mse,mae,r2))

            names.append(modelName)
            mse_final.append(mse)
            mae_final.append(mae)
            r2_final.append(r2)

tanh-1-8-Epochs=30-TIME=1586773609
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30


KeyboardInterrupt: 

In [23]:
result_df = pd.DataFrame({
    'Model': names,
    'MSE': mse_final,
    'MAE': mae_final,
    'R2': r2_final
})

result_df.to_csv('output/results_NN_basic_architectures.csv', index = False, header=True)

In [25]:
result_df.sort_values(by=['MSE'])

Unnamed: 0,Model,MSE,MAE,R2
20,tanh-4-128,0.18148,0.310069,0.601654
12,tanh-2-128,0.182446,0.31046,0.599534
2,tanh-1-32,0.182522,0.309438,0.599366
6,tanh-1-512,0.182922,0.314254,0.598489
9,relu-2-8,0.18303,0.312841,0.598251
14,tanh-2-512,0.184014,0.31505,0.596092
4,tanh-1-128,0.184298,0.310319,0.595468
3,relu-1-32,0.185012,0.31498,0.593901
27,relu-6-32,0.185097,0.317924,0.593715
1,relu-1-8,0.185293,0.313316,0.593284


Top 3 Basic Architectures

In [47]:
NN1 = buildNN(X, num_hidden_layers = 4, 
               hidden_nodes = 128,
               act = 'tanh')
NN2 = buildNN(X, num_hidden_layers = 2, 
               hidden_nodes = 128,
               act = 'tanh')
NN3 = buildNN(X, num_hidden_layers = 1, 
               hidden_nodes = 32,
               act = 'tanh')

### 3.2 Advanced Architectures: Dropout and Regularization

In [48]:
NN1_adv = buildNN_adv(X, num_hidden_layers = 4, 
               hidden_nodes = 128,
               act = 'tanh')
NN2_adv = buildNN_adv(X, num_hidden_layers = 2, 
               hidden_nodes = 128,
               act = 'tanh')
NN3_adv = buildNN_adv(X, num_hidden_layers = 1, 
               hidden_nodes = 32,
               act = 'tanh')

Now we will compare the effects of dropout and l2 regularization on the best 3 neural networks

In [51]:
models = [NN1, NN1_adv, NN2, NN2_adv, NN3, NN3_adv]
modelNames = ['NN1', 'NN1_adv', 'NN2', 'NN2_adv', 'NN3', 'NN3_adv']

i = 0
mse_final = []
mae_final = []
r2_final = []

for model in models:
    LOGNAME = "{}-TIME={}".format( modelNames[i] , int(time.time()) )
    print(LOGNAME)
    tensorboard = TensorBoard(log_dir='logs/NN/l2+dropout/{}'.format(LOGNAME))
    
    model.fit(X_train, y_train, epochs = EPOCHS, batch_size = BATCH_SIZE, 
                         callbacks = [tensorboard],
                validation_split = 0.2)

    mse, r2, mae = evaluateModel(model, X_test, y_test)

    print('RESULTS:')
    print('MSE = {}, MAE = {}, R2 = {}'.format(mse,mae,r2))

    mse_final.append(mse)
    mae_final.append(mae)
    r2_final.append(r2)
    i=i+1

NN1-TIME=1586773950
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
RESULTS:
MSE = 0.19260457558358632, MAE = 0.3162628533534541, R2 = 0.5772355825586251
NN1_adv-TIME=1586774018
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30


Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
RESULTS:
MSE = 0.20837192699587964, MAE = 0.3324996028482298, R2 = 0.5426264611802039
NN2-TIME=1586774117
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30


Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
RESULTS:
MSE = 0.18112915257794046, MAE = 0.31085316332978036, R2 = 0.6024239796004691
NN2_adv-TIME=1586774163
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30


Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
RESULTS:
MSE = 0.20070513411848306, MAE = 0.33248236251305063, R2 = 0.5594549670172816
NN3-TIME=1586774225
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30


Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
RESULTS:
MSE = 0.1844786588446908, MAE = 0.3106689152576532, R2 = 0.5950718590120129
NN3_adv-TIME=1586774258
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30


Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
RESULTS:
MSE = 0.2051648487192946, MAE = 0.3286172350264903, R2 = 0.5496659542721059


In [52]:
result_df2 = pd.DataFrame({
    'Model': modelNames,
    'MSE': mse_final,
    'MAE': mae_final,
    'R2': r2_final
})

result_df2.to_csv('output/results_NN_l2dropout.csv', index = False, header=True)

In [53]:
result_df2

Unnamed: 0,Model,MSE,MAE,R2
0,NN1,0.192605,0.316263,0.577236
1,NN1_adv,0.208372,0.3325,0.542626
2,NN2,0.181129,0.310853,0.602424
3,NN2_adv,0.200705,0.332482,0.559455
4,NN3,0.184479,0.310669,0.595072
5,NN3_adv,0.205165,0.328617,0.549666


It seems like adding both l2 regularization and dropout lowered the performance of the neural networks. So now we will test 1 network with each on their own. It is also worth noting that the performance of the the same neural networks changed slightly, and NN1 is no longer the best.

In [54]:
NN1_do = buildNN_adv(X, num_hidden_layers = 4, 
               hidden_nodes = 128,
                regularizer=False,
               act = 'tanh')
NN1_l2 = buildNN_adv(X, num_hidden_layers = 4, 
               hidden_nodes = 128,
                     do = 0,
               act = 'tanh')

In [57]:
models = [NN1_do, NN1_l2]


mse_final = []
mae_final = []
r2_final = []

for model in models:
    
    model.fit(X_train, y_train, epochs = EPOCHS, batch_size = BATCH_SIZE, 
                validation_split = 0.2)

    mse, r2, mae = evaluateModel(model, X_test, y_test)

    print('RESULTS:')
    print('MSE = {}, MAE = {}, R2 = {}'.format(mse,mae,r2))

    mse_final.append(mse)
    mae_final.append(mae)
    r2_final.append(r2)


print("--------- RESULTS -------")
print('Dropout Only: MSE = {}, MAE = {}, R2 = {}'.format(mse_final[0],mae_final[0],r2_final[0]))
print('L2 Regularization Only: MSE = {}, MAE = {}, R2 = {}'.format(mse_final[1],mae_final[1],r2_final[1]))

Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
RESULTS:
MSE = 0.18845448716784619, MAE = 0.3180876454741099, R2 = 0.5863449700490024
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30


Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
RESULTS:
MSE = 0.20368741874053162, MAE = 0.3292267543637823, R2 = 0.5529088929322576
--------- RESULTS -------
Dropout Only: MSE = 0.18845448716784619, MAE = 0.3180876454741099, R2 = 0.5863449700490024
L2 Regularization Only: MSE = 0.20368741874053162, MAE = 0.3292267543637823, R2 = 0.5529088929322576


Based on the above, dropout alone is better than l2 regularization alone. Also, with l2 regularization, the validation loss seems to fluctuate more with training; this might indicate inability to converge.  

Now we will test different percentage of dropout with the top 3 neural networks.

In [68]:
EPOCHS = 30
BATCH_SIZE = 256

do_values = []
mse_final1 = []
mae_final1 = []
r2_final1 = []
mse_final2 = []
mae_final2 = []
r2_final2 = []
mse_final3 = []
mae_final3 = []
r2_final3 = []

for i in range(1,8):
    do_value = i/10
    LOGNAME = "{}-Epochs={}-TIME={}".format(do_value , EPOCHS, int(time.time()) )
    print(LOGNAME)
    tensorboard1 = TensorBoard(log_dir='logs/NN/dropouts/NN1-{}'.format(LOGNAME))
    tensorboard2 = TensorBoard(log_dir='logs/NN/dropouts/NN2-{}'.format(LOGNAME))
    tensorboard3 = TensorBoard(log_dir='logs/NN/dropouts/NN3-{}'.format(LOGNAME))
    
    NN1_adv = buildNN_adv(X, num_hidden_layers = 4, 
                          hidden_nodes = 128,
                          regularizer=False,
                          do = do_value,
                          act = 'tanh')
    NN2_adv = buildNN_adv(X, num_hidden_layers = 2, 
                          hidden_nodes = 128, 
                          regularizer=False,
                          do = do_value,
                          act = 'tanh')
    NN3_adv = buildNN_adv(X, num_hidden_layers = 1, 
                          hidden_nodes = 32,
                          regularizer=False,
                          do = do_value,
                          act = 'tanh')
    
    ## Training Nets
    print("NN1 dropout={}".format(do_value))
    NN1_adv.fit(X_train, y_train, epochs = EPOCHS, batch_size = BATCH_SIZE,
            callbacks = [tensorboard1],
            validation_split = 0.2)
    print("NN2 dropout={}".format(do_value))
    NN2_adv.fit(X_train, y_train, epochs = EPOCHS, batch_size = BATCH_SIZE,
            callbacks = [tensorboard2],
            validation_split = 0.2)
    print("NN3 dropout={}".format(do_value))
    NN3_adv.fit(X_train, y_train, epochs = EPOCHS, batch_size = BATCH_SIZE,
            callbacks = [tensorboard3],
            validation_split = 0.2)
    
    ## Evaluating Nets
    mse_1, r2_1, mae_1 = evaluateModel(NN1_adv, X_test, y_test)
    mse_2, r2_2, mae_2 = evaluateModel(NN2_adv, X_test, y_test)
    mse_3, r2_3, mae_3 = evaluateModel(NN3_adv, X_test, y_test)
    
    print('RESULTS ----- Dropout = {}-------'.format(do_value))
    print('NN1: MSE = {}, MAE = {}, R2 = {}'.format(mse_1,mae_1,r2_1))
    print('NN2: MSE = {}, MAE = {}, R2 = {}'.format(mse_2,mae_3,r2_2))
    print('NN3: MSE = {}, MAE = {}, R2 = {}'.format(mse_3,mae_3,r2_3))
    
    ## Recording results in lists
    do_values.append(do_value)
    mse_final1.append(mse_1)
    mae_final1.append(mae_1)
    r2_final1.append(r2_1)
    mse_final2.append(mse_2)
    mae_final2.append(mae_2)
    r2_final2.append(r2_2)
    mse_final3.append(mse_3)
    mae_final3.append(mae_3)
    r2_final3.append(r2_3)

0.1-Epochs=30-TIME=1586776356
NN1 dropout=0.1
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
NN2 dropout=0.1
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30


Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
NN3 dropout=0.1
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30


Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
RESULTS ----- Dropout = 0.1-------
NN1: MSE = 0.1880881084410151, MAE = 0.3140476840918303, R2 = 0.587149166359201
NN2: MSE = 0.18459147497035439, MAE = 0.31788237468492214, R2 = 0.5948242291543128
NN3: MSE = 0.19288633668887217, MAE = 0.31788237468492214, R2 = 0.5766171207740445
0.2-Epochs=30-TIME=1586776634
NN1 dropout=0.2
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30


Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
NN2 dropout=0.2
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30


Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
NN3 dropout=0.2
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30


Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
RESULTS ----- Dropout = 0.2-------
NN1: MSE = 0.1885248733093264, MAE = 0.3174293793349413, R2 = 0.5861904734280958
NN2: MSE = 0.18636597289479961, MAE = 0.32060306689598633, R2 = 0.5909292304036031
NN3: MSE = 0.1918593821416436, MAE = 0.32060306689598633, R2 = 0.5788712720037459
0.3-Epochs=30-TIME=1586776851
NN1 dropout=0.3
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30


Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
NN2 dropout=0.3
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


NN3 dropout=0.3
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
RESULTS ----- Dropout = 0.3-------
NN1: MSE = 0.19620074078008468, MAE = 0.33484609216488503, R2 = 0.5693420489823119
NN2: MSE = 0.19061858778834131, MAE = 0.32149725063490636, R2 = 0.581594798692294
NN3: MSE = 0.1960316656850898, MAE = 0.32149725063490636, R2 = 0.5697131665106612
0.4-Epochs=30-TIME=1586777016
NN1 dropout=0.4
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30


Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
NN2 dropout=0.4
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30


Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
NN3 dropout=0.4
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30


Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
RESULTS ----- Dropout = 0.4-------
NN1: MSE = 0.1931543244988066, MAE = 0.3233897626860744, R2 = 0.5760288911849756
NN2: MSE = 0.2169428262478494, MAE = 0.3292033624218335, R2 = 0.5238134541774954
NN3: MSE = 0.20769122459250147, MAE = 0.3292033624218335, R2 = 0.5441205936749443
0.5-Epochs=30-TIME=1586777159
NN1 dropout=0.5
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30


Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
NN2 dropout=0.5
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30


Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
NN3 dropout=0.5
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30


Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
RESULTS ----- Dropout = 0.5-------
NN1: MSE = 0.2052009510812691, MAE = 0.3273631129222319, R2 = 0.5495867100797903
NN2: MSE = 0.1979309458446313, MAE = 0.32674108750860487, R2 = 0.5655442724551923
NN3: MSE = 0.20297688477292725, MAE = 0.32674108750860487, R2 = 0.554468505303752
0.6-Epochs=30-TIME=1586777345
NN1 dropout=0.6
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30


Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
NN2 dropout=0.6
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
NN3 dropout=0.6
Train on 31211 samples, validate on 7803 samples
Epoch 1/30


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
RESULTS ----- Dropout = 0.6-------
NN1: MSE = 0.21489553184186344, MAE = 0.335860821668374, R2 = 0.5283072374858884
NN2: MSE = 0.20376395541366057, MAE = 0.3342262058692374, R2 = 0.5527408959782383
NN3: MSE = 0.21554899085960547, MAE = 0.3342262058692374, R2 = 0.5268729038511839
0.7-Epochs=30-TIME=1586777604
NN1 dropout=0.7
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30


Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
NN2 dropout=0.7
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30


Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
NN3 dropout=0.7
Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30


Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
RESULTS ----- Dropout = 0.7-------
NN1: MSE = 0.21325892905503752, MAE = 0.3434928600804041, R2 = 0.5318995582895815
NN2: MSE = 0.20089386904547374, MAE = 0.34126153926682606, R2 = 0.559040696425743
NN3: MSE = 0.22434703168213438, MAE = 0.34126153926682606, R2 = 0.5075613241979389


In [69]:
result_df2 = pd.DataFrame({
    'Dropout': do_values,
    'MSE_NN1': mse_final1,
    'MAE_NN1': mae_final1,
    'R2_NN1': r2_final1,
    'MSE_NN2': mse_final2,
    'MAE_NN2': mae_final2,
    'R2_NN2': r2_final2,
    'MSE_NN3': mse_final3,
    'MAE_NN3': mae_final3,
    'R2_NN3': r2_final3
})

result_df2.to_csv('output/results_NN_dropouts.csv', index = False, header=True)

In [70]:
result_df2

Unnamed: 0,Dropout,MSE_NN1,MAE_NN1,R2_NN1,MSE_NN2,MAE_NN2,R2_NN2,MSE_NN3,MAE_NN3,R2_NN3
0,0.1,0.188088,0.314048,0.587149,0.184591,0.31327,0.594824,0.192886,0.317882,0.576617
1,0.2,0.188525,0.317429,0.58619,0.186366,0.316387,0.590929,0.191859,0.320603,0.578871
2,0.3,0.196201,0.334846,0.569342,0.190619,0.31752,0.581595,0.196032,0.321497,0.569713
3,0.4,0.193154,0.32339,0.576029,0.216943,0.3349,0.523813,0.207691,0.329203,0.544121
4,0.5,0.205201,0.327363,0.549587,0.197931,0.326285,0.565544,0.202977,0.326741,0.554469
5,0.6,0.214896,0.335861,0.528307,0.203764,0.326412,0.552741,0.215549,0.334226,0.526873
6,0.7,0.213259,0.343493,0.5319,0.200894,0.329107,0.559041,0.224347,0.341262,0.507561


Based on the results above, we can see that increasing the dropout had negative impact on model performance. So we will choose a dropout of 0.1. Even though small dropout does not seem to have an effect on the numerical results, it is still expected to improve generalization on unseen data; therefore, it will be used in our neural nets.

### 3.3 Testing Loss Functions

Here we will test the top 3 neural networks from earlier with different loss functions and compare the results.

In [None]:
EPOCHS = 100
BATCH_SIZE = 256

LOSSES = ['mean_absolute_error', 'mean_squared_error', 'mean_squared_logarithmic_error']

losses = []
mse_final1 = []
mae_final1 = []
r2_final1 = []
mse_final2 = []
mae_final2 = []
r2_final2 = []
mse_final3 = []
mae_final3 = []
r2_final3 = []

for loss in LOSSES:
    LOGNAME = "{}-Epochs={}-TIME={}".format(loss , EPOCHS, int(time.time()) )
    print(LOGNAME)
    tensorboard1 = TensorBoard(log_dir='logs/NN/losses/NN1-{}'.format(LOGNAME))
    tensorboard2 = TensorBoard(log_dir='logs/NN/losses/NN2-{}'.format(LOGNAME))
    tensorboard3 = TensorBoard(log_dir='logs/NN/losses/NN3-{}'.format(LOGNAME))
    
    NN1_adv = buildNN_adv(X, num_hidden_layers = 4, 
                          hidden_nodes = 128,
                          regularizer=False,
                          do = 0.1,
                          loss_function = loss,
                          act = 'tanh')
    NN2_adv = buildNN_adv(X, num_hidden_layers = 2, 
                          hidden_nodes = 128, 
                          regularizer=False,
                          do = 0.1,
                          loss_function = loss,
                          act = 'tanh')
    NN3_adv = buildNN_adv(X, num_hidden_layers = 1, 
                          hidden_nodes = 32,
                          regularizer=False,
                          do = 0.1,
                          loss_function = loss,
                          act = 'tanh')
    
    ## Training Nets
    print("NN1 with {}".format(loss))
    NN1_adv.fit(X_train, y_train, epochs = EPOCHS, batch_size = BATCH_SIZE,
            callbacks = [tensorboard1],
            validation_split = 0.2)
    print("NN2 with {}".format(loss))
    NN2_adv.fit(X_train, y_train, epochs = EPOCHS, batch_size = BATCH_SIZE,
            callbacks = [tensorboard2],
            validation_split = 0.2)
    print("NN3 with {}".format(loss))
    NN3_adv.fit(X_train, y_train, epochs = EPOCHS, batch_size = BATCH_SIZE,
            callbacks = [tensorboard3],
            validation_split = 0.2)
    
    ## Evaluating Nets
    mse_1, r2_1, mae_1 = evaluateModel(NN1_adv, X_test, y_test)
    mse_2, r2_2, mae_2 = evaluateModel(NN2_adv, X_test, y_test)
    mse_3, r2_3, mae_3 = evaluateModel(NN3_adv, X_test, y_test)
    
    print('RESULTS ----- Loss = {} -------'.format(loss))
    print('NN1: MSE = {}, MAE = {}, R2 = {}'.format(mse_1,mae_1,r2_1))
    print('NN2: MSE = {}, MAE = {}, R2 = {}'.format(mse_2,mae_3,r2_2))
    print('NN3: MSE = {}, MAE = {}, R2 = {}'.format(mse_3,mae_3,r2_3))
    
    ## Recording results in lists
    losses.append(loss)
    mse_final1.append(mse_1)
    mae_final1.append(mae_1)
    r2_final1.append(r2_1)
    mse_final2.append(mse_2)
    mae_final2.append(mae_2)
    r2_final2.append(r2_2)
    mse_final3.append(mse_3)
    mae_final3.append(mae_3)
    r2_final3.append(r2_3)

mean_absolute_error-Epochs=100-TIME=1586809930
NN1 with mean_absolute_error
Train on 31211 samples, validate on 7803 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100

In [99]:
result_df3 = pd.DataFrame({
    'Loss': losses,
    'MSE_NN1': mse_final1,
    'MAE_NN1': mae_final1,
    'R2_NN1': r2_final1,
    'MSE_NN2': mse_final2,
    'MAE_NN2': mae_final2,
    'R2_NN2': r2_final2,
    'MSE_NN3': mse_final3,
    'MAE_NN3': mae_final3,
    'R2_NN3': r2_final3
})

result_df3.to_csv('output/results_NN_losses.csv', index = False, header=True)

result_df3

Unnamed: 0,Loss,MSE_NN1,MAE_NN1,R2_NN1,MSE_NN2,MAE_NN2,R2_NN2,MSE_NN3,MAE_NN3,R2_NN3
0,mean_absolute_error,0.186119,0.31839,0.591471,0.191439,0.316403,0.579793,0.191159,0.318819,0.580408
1,mean_squared_error,0.188908,0.322718,0.58535,0.185217,0.315449,0.593451,0.189995,0.324927,0.582963
2,mean_squared_logarithmic_error,0.195414,0.320969,0.571069,0.186806,0.3179,0.589962,0.198367,0.328574,0.564588


## 4.0 Building Model

In [73]:
rf = RandomForestRegressor(n_estimators = 50)
ridge = Ridge(alpha=5)
huber = HuberRegressor(alpha=10, epsilon=3)

In [91]:
NN1 = buildNN_adv(X, num_hidden_layers = 4, 
                          hidden_nodes = 128,
                          regularizer=False,
                          do = 0.1,
                          act = 'tanh')

NN2 = buildNN_adv(X, num_hidden_layers = 2, 
                          hidden_nodes = 128, 
                          regularizer=False,
                          do = do_value,
                          act = 'tanh')

NN3= buildNN_adv(X, num_hidden_layers = 1, 
                          hidden_nodes = 128,
                          regularizer=False,
                          do = do_value,
                          act = 'relu')

In [92]:
MODELS = [NN1, NN2, NN3]

## FIRST RF
X_train1, X_ignore, y_train1, y_ignore = train_test_split(X_train, y_train, test_size=0.4, random_state=42)
    
#Train model on the randomly sectioned train data
rf.fit(X_train1,y_train1)

print(rf.__class__.__name__)

# Display performance on ignored set
y_pred = rf.predict(X_ignore)
mse = mean_squared_error(y_ignore, y_pred)
print("Ignored MSE = {}".format(mse))

rf_out = rf.predict(X_train)
rf_out = np.reshape(rf_out, (rf_out.shape[0], 1) )
mse = mean_squared_error(y_train, rf_out)
print("Train MSE = {}".format(mse))
fusion_vector_train = rf_out

rf_out = rf.predict(X_test)
rf_out = np.reshape(rf_out, (rf_out.shape[0], 1) )
fusion_vector_test = rf_out

mse = mean_squared_error(y_test, rf_out)
print("Test MSE = {}".format(mse))

for model in MODELS:
    print(model.__class__.__name__)
        
    X_train1, X_ignore, y_train1, y_ignore = train_test_split(X_train, y_train, test_size=0.4, random_state=42)
    
    #Train model on the randomly sectioned train data
    model.fit(X_train1, y_train1, epochs = EPOCHS, batch_size = BATCH_SIZE,
               validation_split = 0.2)
    
    # Display performance on ignored set
    y_pred = model.predict(X_ignore)
    mse = mean_squared_error(y_ignore, y_pred)
    print("Ignored MSE = {}".format(mse))
    
    model_out = model.predict(X_train)
    model_out = np.reshape(model_out, (model_out.shape[0], 1) )
    mse = mean_squared_error(y_train, model_out)
    print("Train MSE = {}".format(mse))
    
    # Fusion Train
    fusion_vector_train = np.hstack((fusion_vector_train,model_out))
    
    model_out = model.predict(X_test)
    model_out = np.reshape(model_out, (model_out.shape[0], 1) )
    mse = mean_squared_error(y_test, model_out)
    print("Test MSE = {}".format(mse))
    
    # Fusion Test
    fusion_vector_test = np.hstack((fusion_vector_test,model_out))

RandomForestRegressor
Ignored MSE = 0.1854306138774688
Train MSE = 0.0908593748777104
Test MSE = 0.17965943200400852
Sequential
Train on 18726 samples, validate on 4682 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Ignored MSE = 0.19178494888766395
Train MSE = 0.18883668570864853
Test MSE = 0.18857516489337928
Sequential
Train on 18726 samples, validate on 4682 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30


Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Ignored MSE = 0.2119870049923725
Train MSE = 0.21350813428898807
Test MSE = 0.20747365571550347
Sequential
Train on 18726 samples, validate on 4682 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30


Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Ignored MSE = 0.19841362981253793
Train MSE = 0.19984878378607382
Test MSE = 0.19622619267403832


In [93]:
rf.fit(fusion_vector_train, y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=50, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

In [95]:
rf_out = rf.predict(fusion_vector_test)
mse = mean_squared_error(y_test, rf_out)
print("Test MSE = {}".format(mse))

Test MSE = 0.19364103517168627


In [94]:
numLayers = 2
numNodes = 128
EPOCHS = 30
BATCH_SIZE = 32

early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0.001, patience=5, verbose=0, mode='auto',
    baseline=None, restore_best_weights=False
)

nn_model = buildNN_adv(fusion_vector_train, num_hidden_layers = numLayers, hidden_nodes = numNodes,
                      regularizer = False, do = 0.2)
nn_model.fit(fusion_vector_train, y_train, epochs = EPOCHS, batch_size = BATCH_SIZE, callbacks = [tensorboard],
            validation_split = 0.2)

mse_f, r2_f, mae_f = evaluateModel(nn_model, fusion_vector_test, y_test)
print('final results ---  MSE = {}, MAE = {}, R2 = {}'.format(mse_f,mae_f,r2_f))

Train on 31211 samples, validate on 7803 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
final results ---  MSE = 0.1957203353689293, MAE = 0.32803307128984704, R2 = 0.570396532309966


NN1: MSE = 0.20007391655180432, MAE = 0.3261489488214801, R2 = 0.5608404809701414
