# Modeling Cross Sections

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pathlib
import importlib
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import regularizers

print(tf.__version__)

import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling

from  IPython import display
import pathlib
import shutil
import tempfile
import os
import importlib

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 50)
sns.set_style("darkgrid")

2.0.0


In [2]:
logdir = pathlib.Path(tempfile.mkdtemp())/"tensorboard_logs"
shutil.rmtree(logdir, ignore_errors=True)

In [3]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  2


In [4]:
sns.set(font_scale=2)

In [5]:
import sys
# This allows us to import the nucml utilities
sys.path.append("..")

In [34]:
import nucml.exfor.data_utilities as exfor_utils
import nucml.endf.data_utilities as endf_utils
import nucml.plot.plotting_utilities as plot_utils
import nucml.datasets as nuc_data
importlib.reload(exfor_utils)
importlib.reload(endf_utils)
importlib.reload(plot_utils)
importlib.reload(nuc_data)
print("Finish importing scripts.")

Finish importing scripts.


# Getting the Data: ENSDF/RIPL Known and Cut-Off
# Creating Training, Validation, and Testing Datasets and Data Standarization 

In [7]:
log_E = True

In [8]:
train_size = 0.8
test_size = 1 - 0.8

In [11]:
kwarg = {"elemental":True, "Z":17}

df, x_train, x_test, y_train, y_test, to_scale, scaler = nuc_data.load_exfor(num=True, basic=True, frac=test_size, **kwarg)

C:\Users\Pedro\Desktop\ML_Nuclear_Data\ML_Data\EXFOR_neutrons\EXFOR_neutrons_MF3_AME_no_NaNRaw.csv
Reading data into dataframe...
Extracting samples from dataframe.
EXFOR extracted DataFrame has shape:  (1744, 66)
Data read into dataframe with shape:  (1744, 10)
Dropping unnecessary features and one-hot encoding categorical columns...
Splitting dataset into training and testing...
Normalizing dataset...
Finished. Resulting dataset has shape  (1744, 49) 
Training and Testing dataset shapes are (1395, 48) and (349, 48) respesctively.


  loglike = -n_samples / 2 * np.log(x_trans.var())


In [16]:
df.Target_Mass_Number.value_counts()

36    1019
35     472
37     253
Name: Target_Mass_Number, dtype: int64

In [17]:
df.columns

Index(['Energy', 'Data', 'Target_Protons', 'Target_Neutrons',
       'Target_Mass_Number', 'MT_1', 'MT_102', 'MT_16', 'MT_17', 'MT_2',
       'MT_3', 'MT_4', 'MT_101', 'MT_103', 'MT_104', 'MT_41', 'MT_9000',
       'MT_105', 'MT_32', 'MT_51', 'MT_33', 'MT_107', 'MT_24', 'MT_155',
       'MT_158', 'MT_159', 'MT_108', 'MT_29', 'MT_1108', 'MT_113', 'MT_106',
       'MT_22', 'MT_1003', 'MT_9001', 'MT_28', 'MT_111', 'MT_203', 'MT_2103',
       'MT_112', 'MT_37', 'MT_161', 'MT_152', 'MT_153', 'MT_18', 'MT_160',
       'Frame_L', 'Frame_C', 'Target_Flag_I', 'Target_Flag_N'],
      dtype='object')

In [31]:
# load_endf() extracts ENDF data from ML_Data directory
endf_cl = nuc_data.load_endf("Cl035", "MT103")

C:\Users\Pedro\Desktop\ML_Nuclear_Data\ML_Data\ENDF_neutrons\\Cl035\endfb8.0\tables\xs\n-Cl035-MT103.endfb8.0
Convering MeV to eV...
Convering mb to b...
Finish reading ENDF data with shape:  (8791, 2)


In [32]:
# Loading LBNL new chlorine measurments
new_data = endf_utils.load_new("../EXFOR/New_Data/Chlorine_Data/new_cl_np.csv")

../EXFOR/New_Data/Chlorine_Data/new_cl_np.csv
Finish reading ENDF data with shape:  (12, 4)


In [38]:
new_cl_data_kwargs = {"Z":17, "A":35, "MT":"MT_103", "append_exfor":True, "log_e":False}
new_cl_data = exfor_utils.load_exfor_newdata("../EXFOR/New_Data/Chlorine_Data/new_cl_np.csv", df=df, **new_cl_data_kwargs)
new_cl_data.head()

Extracting samples from dataframe.
EXFOR extracted DataFrame has shape:  (215, 49)
Expanded Dataset has shape:  (12, 49)


Unnamed: 0,Energy,Data,Target_Protons,Target_Neutrons,Target_Mass_Number,MT_1,MT_102,MT_16,MT_17,MT_2,MT_3,MT_4,MT_101,MT_103,MT_104,MT_41,MT_9000,MT_105,MT_32,MT_51,MT_33,MT_107,MT_24,MT_155,MT_158,MT_159,MT_108,MT_29,MT_1108,MT_113,MT_106,MT_22,MT_1003,MT_9001,MT_28,MT_111,MT_203,MT_2103,MT_112,MT_37,MT_161,MT_152,MT_153,MT_18,MT_160,Frame_L,Frame_C,Target_Flag_I,Target_Flag_N
0,2420000,0.0166,17,18,35,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0
1,2420000,0.0196,17,18,35,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0
2,2520000,0.0261,17,18,35,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0
3,2520000,0.0257,17,18,35,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0
4,2580000,0.0446,17,18,35,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0


# Optimization - Learning Rate

A gradually reducing learning rate with time performes better (jumping around). The `schedules.InverseTimeDecay` decreases rate hyperbolicallly to 1/2 of the base rate at 1000 epochs, 1/3 at 2000 and so on.

# Compile and Fit Funciton - TensorBoard Logs

In [18]:
def get_optimizer(lr_schedule):
    return tf.keras.optimizers.Adam(lr_schedule)

def get_callbacks(name):
    return [
        tfdocs.modeling.EpochDots(),
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=200), # val_loss
        tf.keras.callbacks.TensorBoard(logdir/name)]

def compile_and_fit(model, name, train_dataset, test_dataset, STEPS_PER_EPOCH, BATCH_SIZE=None, 
                    optimizer=None, max_epochs=10000, DECAY_EPOCHS=1000):
    lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
        0.001, decay_steps=STEPS_PER_EPOCH*DECAY_EPOCHS,
        decay_rate=1, staircase=False)
    
    if optimizer is None:
        optimizer = get_optimizer(lr_schedule)
    model.compile(optimizer=optimizer,
                  loss='mse',
                  metrics=['mae', 'mse'])
    model.summary()
    history = model.fit(
        train_dataset,
        steps_per_epoch = STEPS_PER_EPOCH,
        epochs=max_epochs,
        validation_data=test_dataset,
        callbacks=get_callbacks(name),
        verbose=0)        
    return history

In [19]:
cpu_strategy = tf.distribute.OneDeviceStrategy(device="/CPU:0")
single_gpu_strategy =  tf.distribute.OneDeviceStrategy(device="/GPU:0")
gpu_strategy = tf.distribute.MirroredStrategy(cross_device_ops=tf.distribute.HierarchicalCopyAllReduce())
print('Number of devices: {}'.format(gpu_strategy.num_replicas_in_sync))

Number of devices: 2


# Create a tf.data.Dataset Generator

The larger the `BATCH_SIZE`, the more efficient TensorFlow operates. 

In [20]:
N_VALIDATION = len(x_test)
N_TRAIN = len(x_train)
BUFFER_SIZE = N_TRAIN
BATCH_SIZE = 500
FEATURES = len(x_train.columns)

In [21]:
def tf_dataset_gen(x, y, xt, yt, BUFFER_SIZE, BATCH_SIZE, gpu=False, multiplier=0, cache=False):
    if gpu == True:
        BATCH_SIZE = BATCH_SIZE * multiplier
        print("GPU: ON")
    train_dataset = tf.data.Dataset.from_tensor_slices((x.values, y.values)).shuffle(BUFFER_SIZE).repeat().batch(BATCH_SIZE)
    test_dataset = tf.data.Dataset.from_tensor_slices((xt.values, yt.values)).batch(BATCH_SIZE)
    if cache == True: # Ensures loader doesnt re-read data at each epoch.
        train_dataset = train_dataset.cache()
        test_dataset = test_dataset.cache()
    STEPS_PER_EPOCH = N_TRAIN//BATCH_SIZE
    print("BATCH SIZE: ", BATCH_SIZE)
    print("STEPS PER EPOCH: ", STEPS_PER_EPOCH)
    return train_dataset, test_dataset, STEPS_PER_EPOCH, BATCH_SIZE

In [22]:
train_dataset, test_dataset, STEPS_PER_EPOCH_CPU, BATCH_SIZE_CPU = tf_dataset_gen(
    x_train, y_train, x_test, y_test, BUFFER_SIZE, BATCH_SIZE)

BATCH SIZE:  500
STEPS PER EPOCH:  2


In [29]:
train_dataset_gpu, test_dataset_gpu, STEPS_PER_EPOCH_GPU, BATCH_SIZE_GPU = tf_dataset_gen(
    x_train, y_train, x_test, y_test, BUFFER_SIZE, 200, gpu=True, multiplier=2)

GPU: ON
BATCH SIZE:  400
STEPS PER EPOCH:  3


# Training Models

In [25]:
size_histories = {}
size_histories_gpu = {}

In [41]:
# %%time
# with cpu_strategy.scope():
#     tiny_model = tf.keras.Sequential([
#         layers.Dense(16, activation='elu', input_shape=(FEATURES,)),
#         layers.Dense(1)])
#     size_histories['Tiny_CPU'] = compile_and_fit(tiny_model, 'sizes/Tiny_CPU', train_dataset, 
#                                                  test_dataset, STEPS_PER_EPOCH_CPU, max_epochs=5000)

In [30]:
%%time
# with gpu_strategy.scope():
with single_gpu_strategy.scope():
    tiny_model = tf.keras.Sequential([layers.Dense(16, activation='elu', input_shape=(FEATURES,)), 
                                      layers.Dense(1)])
    size_histories['Tiny_GPU'] = compile_and_fit(tiny_model, 'sizes/Tiny_GPU', 
                                                 train_dataset_gpu, test_dataset_gpu, 
                                                      STEPS_PER_EPOCH_GPU, max_epochs=5000, DECAY_EPOCHS=1000)

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 16)                784       
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 17        
Total params: 801
Trainable params: 801
Non-trainable params: 0
_________________________________________________________________

Epoch: 0, loss:12.8358,  mae:0.8951,  mse:12.8358,  val_loss:1.9230,  val_mae:0.7191,  val_mse:1.9230,  
....................................................................................................
Epoch: 100, loss:9.0198,  mae:0.7339,  mse:9.0198,  val_loss:1.1391,  val_mae:0.6192,  val_mse:1.1391,  
....................................................................................................
Epoch: 200, loss:8.8378,  mae:0.7216,  mse:8.8378,  val_loss:1.1663,  val_mae:0.5999,  val_mse:1.1663,  


In [40]:
# %%time
# with gpu_strategy.scope():
#     tiny_model = tf.keras.Sequential([layers.Dense(16, activation='elu', input_shape=(FEATURES,)), 
#                                       layers.Dense(1)])
#     size_histories['Tiny_GPU_500'] = compile_and_fit(tiny_model, 'sizes/Tiny_GPU_500', 
#                                                  train_dataset_gpu, test_dataset_gpu, 
#                                                       STEPS_PER_EPOCH_GPU, max_epochs=5000, DECAY_EPOCHS=500)

In [42]:
# %%time
# with gpu_strategy.scope():
#     tiny_model = tf.keras.Sequential([layers.Dense(16, activation='elu', input_shape=(FEATURES,)), 
#                                       layers.Dense(1)])
#     size_histories['Tiny_GPU_15000'] = compile_and_fit(tiny_model, 'sizes/Tiny_GPU_500', 
#                                                  train_dataset_gpu, test_dataset_gpu, 
#                                                       STEPS_PER_EPOCH_GPU, max_epochs=15000, DECAY_EPOCHS=500)

In [43]:
%%time
with single_gpu_strategy.scope():
    tiny_model = tf.keras.Sequential([layers.Dense(16, activation='elu', input_shape=(FEATURES,)), 
                                      layers.Dense(16, activation='elu'),
                                      layers.Dense(16, activation='elu'),
                                      layers.Dense(16, activation='elu'),
                                      layers.Dense(16, activation='elu'),
                                      layers.Dense(16, activation='elu'),
                                      layers.Dense(1)])
    size_histories['Tiny_GPU_exp'] = compile_and_fit(tiny_model, 'sizes/Tiny_GPU_exp', 
                                                 train_dataset_gpu, test_dataset_gpu, 
                                                      STEPS_PER_EPOCH_GPU, max_epochs=15000, DECAY_EPOCHS=500)

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 16)                784       
_________________________________________________________________
dense_9 (Dense)              (None, 16)                272       
_________________________________________________________________
dense_10 (Dense)             (None, 16)                272       
_________________________________________________________________
dense_11 (Dense)             (None, 16)                272       
_________________________________________________________________
dense_12 (Dense)             (None, 16)                272       
_________________________________________________________________
dense_13 (Dense)             (None, 16)                272       
_________________________________________________________________
dense_14 (Dense)             (None, 1)                

In [46]:
%%time
with single_gpu_strategy.scope():
    tiny_model = tf.keras.Sequential([layers.Dense(112, activation='elu', input_shape=(FEATURES,)), 
                                      layers.Dense(112, activation='elu'),
                                      layers.Dense(112, activation='elu'),
                                      layers.Dense(112, activation='elu'),
                                      layers.Dense(112, activation='elu'),
                                      layers.Dense(112, activation='elu'),
                                      layers.Dense(112, activation='elu'),
                                      layers.Dense(112, activation='elu'),
                                      layers.Dense(112, activation='elu'),
                                      layers.Dense(112, activation='elu'),
                                      layers.Dense(112, activation='elu'),
                                      layers.Dense(112, activation='elu'),
                                      layers.Dense(112, activation='elu'),
                                      layers.Dense(112, activation='elu'),
                                      layers.Dense(112, activation='elu'),
                                      layers.Dense(1)])
    size_histories['Tiny_GPU_exp112'] = compile_and_fit(tiny_model, 'sizes/Tiny_GPU_exp112', 
                                                 train_dataset_gpu, test_dataset_gpu, 
                                                      STEPS_PER_EPOCH_GPU, max_epochs=15000, DECAY_EPOCHS=500)

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_15 (Dense)             (None, 112)               5488      
_________________________________________________________________
dense_16 (Dense)             (None, 112)               12656     
_________________________________________________________________
dense_17 (Dense)             (None, 112)               12656     
_________________________________________________________________
dense_18 (Dense)             (None, 112)               12656     
_________________________________________________________________
dense_19 (Dense)             (None, 112)               12656     
_________________________________________________________________
dense_20 (Dense)             (None, 112)               12656     
_________________________________________________________________
dense_21 (Dense)             (None, 112)              

In [48]:
%%time
with single_gpu_strategy.scope():
    tiny_model = tf.keras.Sequential([layers.Dense(112, activation='relu', input_shape=(FEATURES,)), 
                                      layers.Dense(112, activation='relu'),
                                      layers.Dense(112, activation='relu'),
                                      layers.Dense(112, activation='relu'),
                                      layers.Dense(112, activation='relu'),
                                      layers.Dense(112, activation='relu'),
                                      layers.Dense(112, activation='relu'),
                                      layers.Dense(112, activation='relu'),
                                      layers.Dense(112, activation='relu'),
                                      layers.Dense(112, activation='relu'),
                                      layers.Dense(112, activation='relu'),
                                      layers.Dense(112, activation='relu'),
                                      layers.Dense(112, activation='relu'),
                                      layers.Dense(112, activation='relu'),
                                      layers.Dense(112, activation='relu'),
                                      layers.Dense(1)])
    size_histories['Tiny_GPU_exp_relu'] = compile_and_fit(tiny_model, 'sizes/Tiny_GPU_exp_relu', 
                                                 train_dataset_gpu, test_dataset_gpu, 
                                                      STEPS_PER_EPOCH_GPU, max_epochs=15000, DECAY_EPOCHS=500)

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_31 (Dense)             (None, 112)               5488      
_________________________________________________________________
dense_32 (Dense)             (None, 112)               12656     
_________________________________________________________________
dense_33 (Dense)             (None, 112)               12656     
_________________________________________________________________
dense_34 (Dense)             (None, 112)               12656     
_________________________________________________________________
dense_35 (Dense)             (None, 112)               12656     
_________________________________________________________________
dense_36 (Dense)             (None, 112)               12656     
_________________________________________________________________
dense_37 (Dense)             (None, 112)              

In [104]:
# plt.figure(figsize=(15,10))
# plotter = tfdocs.plots.HistoryPlotter(metric='mae', smoothing_std=5)
# plotter.plot(size_histories)
# plt.ylim()
# plt.ylabel('MAE [MeV]')

In [50]:
%%time
with single_gpu_strategy.scope():
    medium_model = tf.keras.Sequential([
        layers.Dense(64, activation='elu', input_shape=(FEATURES,)),
        layers.Dense(64, activation='elu'),
        layers.Dense(64, activation='elu'),
        layers.Dense(1)])
    size_histories['Medium_GPU'] = compile_and_fit(medium_model, 'sizes/Medium_GPU', 
                                                 train_dataset_gpu, test_dataset_gpu, STEPS_PER_EPOCH_GPU, max_epochs=5000)

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_47 (Dense)             (None, 64)                3136      
_________________________________________________________________
dense_48 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_49 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_50 (Dense)             (None, 1)                 65        
Total params: 11,521
Trainable params: 11,521
Non-trainable params: 0
_________________________________________________________________

Epoch: 0, loss:12.1501,  mae:0.8857,  mse:12.1501,  val_loss:1.7294,  val_mae:0.6091,  val_mse:1.7294,  
....................................................................................................
Epoch: 100, loss:5.1973,  mae:0.6206,  ms

In [51]:
%%time
with single_gpu_strategy.scope():
    large_model = tf.keras.Sequential([
        layers.Dense(512, activation='elu', input_shape=(FEATURES,)),
        layers.Dense(512, activation='elu'),
        layers.Dense(512, activation='elu'),
        layers.Dense(512, activation='elu'),
        layers.Dense(1)])
    size_histories['Large_GPU'] = compile_and_fit(large_model, 'sizes/Large_GPU', 
                                                 train_dataset_gpu, test_dataset_gpu, STEPS_PER_EPOCH_GPU, max_epochs=5000)

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_51 (Dense)             (None, 512)               25088     
_________________________________________________________________
dense_52 (Dense)             (None, 512)               262656    
_________________________________________________________________
dense_53 (Dense)             (None, 512)               262656    
_________________________________________________________________
dense_54 (Dense)             (None, 512)               262656    
_________________________________________________________________
dense_55 (Dense)             (None, 1)                 513       
Total params: 813,569
Trainable params: 813,569
Non-trainable params: 0
_________________________________________________________________

Epoch: 0, loss:12.8413,  mae:1.3320,  mse:12.8413,  val_loss:1.6885,  val_mae:0.8537,  val_mse:1.6885,  
........

In [53]:
# plt.figure(figsize=(15,10))
# plotter.plot(size_histories)
# a = plt.xscale('log')
# plt.xlim([5, max(plt.xlim())])
# # plt.ylim([0.5, 0.7])
# plt.yscale('log')
# plt.xlabel("Epochs [Log Scale]")

# Adding Regularization

As we know from Occam's Razor Principle the simplest one is the most likeley to be correct (the one with least amount of assumptions). Similar for neural networks, there will always be multiple models and therefore weights that can explain data. A simple model in this context is a model where the distribution of parameter values has less entropy. A common way to mitigate overfitting is to put constraints on the complexity of the NN by forcing weights to take small values which makes the distribution of such values more regular (weight regularization). This can be done by adding to the loss functionof the network a cost associated with having large weights. This costs can be:
- L1 Regularization: cost added proportional to the absolute value of the weights coefficients (L1 Norm). It pushes weights towards exactly 0 encouraging sparsity.
- L2 Regularization: proportional to the square of the value of the weights coefficients (L2 Norm). It is also called weight decay in NN context. It penalizes weight parameters without making them sparse since the weights go to 0 but are not 0. More common of the two. 

The `regularizers.l2(VALUE)` will make that every coefficient in the weight matrix of the layer will add VALUE * $weight_coefficients_value**2$ to the total loss of the network. In binary classification problems we monitor binary crossentropy since it doesnt have this regularization component mixed in.

Check if l2 is better than large at overfitting (same parameters)?

Dropout one of the most effective and most commonly used for NN. It is based on the fact that individual nodes in the N cannot rely on the output of the otherse, each node must output features that are useful on their own. The drooput layer randomly droppes out (set to zero) a number of output features of the layer during training. Let's say a given layer would normally have returned a vector [0.2, 0.5, 1.3, 0.8, 1.1] for a given input sample during training; after applying dropout, this vector will have a few zero entries distributed at random, e.g. [0, 0.5, 1.3, 0, 1.1]. The dropout rate is the fraction of features that are zeroed out (between 0.2 to 0.5 usually). 

In [54]:
regularizer_histories = {}
# regularizer_histories['Tiny_GPU_1000'] = size_histories['Tiny_GPU_1000']

In [55]:
%%time
with single_gpu_strategy.scope():
    l2_model = tf.keras.Sequential([
        layers.Dense(512, activation='elu',
                     kernel_regularizer=regularizers.l2(0.001),
                     input_shape=(FEATURES,)),
        layers.Dense(512, activation='elu',
                     kernel_regularizer=regularizers.l2(0.001)),
        layers.Dense(512, activation='elu',
                     kernel_regularizer=regularizers.l2(0.001)),
        layers.Dense(512, activation='elu',
                     kernel_regularizer=regularizers.l2(0.001)),
        layers.Dense(1)
    ])
    
    regularizer_histories['l2'] = compile_and_fit(l2_model, 'regularizers/l2', 
                                                 train_dataset_gpu, test_dataset_gpu, STEPS_PER_EPOCH_GPU, max_epochs=5000)

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_56 (Dense)             (None, 512)               25088     
_________________________________________________________________
dense_57 (Dense)             (None, 512)               262656    
_________________________________________________________________
dense_58 (Dense)             (None, 512)               262656    
_________________________________________________________________
dense_59 (Dense)             (None, 512)               262656    
_________________________________________________________________
dense_60 (Dense)             (None, 1)                 513       
Total params: 813,569
Trainable params: 813,569
Non-trainable params: 0
_________________________________________________________________

Epoch: 0, loss:12.3083,  mae:1.1600,  mse:10.6891,  val_loss:4.1694,  val_mae:1.2044,  val_mse:2.5574,  
........

In [57]:
%%time
with single_gpu_strategy.scope():
    dropout_model = tf.keras.Sequential([
        layers.Dense(512, activation='elu', input_shape=(FEATURES,)),
        layers.Dropout(0.5),
        layers.Dense(512, activation='elu'),
        layers.Dropout(0.5),
        layers.Dense(512, activation='elu'),
        layers.Dropout(0.5),
        layers.Dense(512, activation='elu'),
        layers.Dropout(0.5),
        layers.Dense(1)
    ])
    
    regularizer_histories['dropout'] = compile_and_fit(dropout_model, 'regularizers/dropout', 
                                                 train_dataset_gpu, test_dataset_gpu, STEPS_PER_EPOCH_GPU, max_epochs=5000)

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_61 (Dense)             (None, 512)               25088     
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
dense_62 (Dense)             (None, 512)               262656    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_63 (Dense)             (None, 512)               262656    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_64 (Dense)             (None, 512)             

In [58]:
%%time
with single_gpu_strategy.scope():
    combined_model = tf.keras.Sequential([
        layers.Dense(512, kernel_regularizer=regularizers.l2(0.0001),
                     activation='elu', input_shape=(FEATURES,)),
        layers.Dropout(0.5),
        layers.Dense(512, kernel_regularizer=regularizers.l2(0.0001),
                     activation='elu'),
        layers.Dropout(0.5),
        layers.Dense(512, kernel_regularizer=regularizers.l2(0.0001),
                     activation='elu'),
        layers.Dropout(0.5),
        layers.Dense(512, kernel_regularizer=regularizers.l2(0.0001),
                     activation='elu'),
        layers.Dropout(0.5),
        layers.Dense(1)
    ])
    
    regularizer_histories['combined'] = compile_and_fit(combined_model, 'regularizers/combined', 
                                                 train_dataset_gpu, test_dataset_gpu, STEPS_PER_EPOCH_GPU, max_epochs=5000)

Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_66 (Dense)             (None, 512)               25088     
_________________________________________________________________
dropout_4 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_67 (Dense)             (None, 512)               262656    
_________________________________________________________________
dropout_5 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_68 (Dense)             (None, 512)               262656    
_________________________________________________________________
dropout_6 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_69 (Dense)             (None, 512)             

In [59]:
%%time
with single_gpu_strategy.scope():
    combined_model = tf.keras.Sequential([
        layers.Dense(1000, kernel_regularizer=regularizers.l2(0.0001),
                     activation='elu', input_shape=(FEATURES,)),
        layers.Dropout(0.5),
        layers.Dense(1000, kernel_regularizer=regularizers.l2(0.0001),
                     activation='elu'),
        layers.Dropout(0.5),
        layers.Dense(1000, kernel_regularizer=regularizers.l2(0.0001),
                     activation='elu'),
        layers.Dropout(0.5),
        layers.Dense(1000, kernel_regularizer=regularizers.l2(0.0001),
                     activation='elu'),
        layers.Dropout(0.5),
        layers.Dense(1)
    ])
    
    regularizer_histories['combined_1000'] = compile_and_fit(combined_model, 'regularizers/combined_1000', 
                                                 train_dataset_gpu, test_dataset_gpu, STEPS_PER_EPOCH_GPU, max_epochs=5000)

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_71 (Dense)             (None, 1000)              49000     
_________________________________________________________________
dropout_8 (Dropout)          (None, 1000)              0         
_________________________________________________________________
dense_72 (Dense)             (None, 1000)              1001000   
_________________________________________________________________
dropout_9 (Dropout)          (None, 1000)              0         
_________________________________________________________________
dense_73 (Dense)             (None, 1000)              1001000   
_________________________________________________________________
dropout_10 (Dropout)         (None, 1000)              0         
_________________________________________________________________
dense_74 (Dense)             (None, 1000)            

In [61]:
# plt.figure(figsize=(15,10))
# plotter.plot(regularizer_histories)
# a = plt.xscale('log')
# plt.xlim([2, max(plt.xlim())])
# # plt.ylim([0.5, 0.7])
# plt.yscale('log')
# plt.xlabel("Epochs [Log Scale]")

In [121]:
%%time
with gpu_strategy.scope():
    dropout_model = tf.keras.Sequential([
        layers.Dense(512, activation='elu', input_shape=(FEATURES,)),
        layers.Dropout(0.5),
        layers.Dense(512, activation='elu'),
        layers.Dropout(0.5),
        layers.Dense(512, activation='elu'),
        layers.Dropout(0.5),
        layers.Dense(512, activation='elu'),
        layers.Dropout(0.5),
        layers.Dense(512, activation='elu'),
        layers.Dropout(0.5),
        layers.Dense(512, activation='elu'),
        layers.Dropout(0.5),
        layers.Dense(512, activation='elu'),
        layers.Dropout(0.5),
        layers.Dense(512, activation='elu'),
        layers.Dropout(0.5),
        layers.Dense(512, activation='elu'),
        layers.Dropout(0.5),
        layers.Dense(512, activation='elu'),
        layers.Dropout(0.5),
        layers.Dense(1)
    ])
    
    regularizer_histories['dropout_3'] = compile_and_fit(dropout_model, 'regularizers/dropout_3', 
                                                 train_dataset_gpu, test_dataset_gpu, STEPS_PER_EPOCH_GPU, max_epochs=5000)

Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_47 (Dense)             (None, 512)               2560      
_________________________________________________________________
dropout_12 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_48 (Dense)             (None, 512)               262656    
_________________________________________________________________
dropout_13 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_49 (Dense)             (None, 512)               262656    
_________________________________________________________________
dropout_14 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_50 (Dense)             (None, 512)             

In [62]:
# hist = pd.DataFrame(regularizer_histories['l2'].history)
# hist['epoch'] = regularizer_histories['l2'].epoch
# hist.tail()

## Conclusion

This notebook introduced a few techniques to handle a regression problem.

* Mean Squared Error (MSE) is a common loss function used for regression problems (different loss functions are used for classification problems).
* Similarly, evaluation metrics used for regression differ from classification. A common regression metric is Mean Absolute Error (MAE).
* When numeric input data features have values with different ranges, each feature should be scaled independently to the same range.
* If there is not much training data, one technique is to prefer a small network with few hidden layers to avoid overfitting.
* Early stopping is a useful technique to prevent overfitting.