In [None]:
import sys
sys.path.append("..")

In [None]:
# Setup
import reproducibility

# both can change
# Environment
import numpy as np
import pandas as pd
import tensorflow as tf

import matplotlib.pyplot as plt
import tensorflow as tf
import pickle, os

from data_funcs import load_and_fix_data, plot_data
from moisture_rnn import create_rnn_data, train_rnn, rnn_predict

# Physics-Initiated Neural Networks - Output Layers

The purpose of this notebook is to discuss the final output layer of physics-initiated Neural Networks. Physics might constrain the range of model outputs, so it might make sense to hard-code this into the output layer. Furthermore, a physical system may have different behavior when it approaches the minimum and maximum allowable values.

## Fuel Moisture Models

In the fuel moisture content (FM) modeling framework, there are constraints on the model outputs to make it physically reasonable. First, FM cannot go below 0%, when there is no water content in the fuels. The maximum possible water content depends on the fuel type. Live fuels range from 200-250% maximum allowable moisture. For dead fuels, Mandel et.al. 2014 use the model below with a "saturation" moisture level $S=250\%$:


$$
\frac{\mathrm{d}m}{\mathrm{d}t}=\frac{S-m}{t_{\mathrm{r}}}\left(1-\exp\left(-\frac{r-r_0}{r_{\mathrm{s}}}
\right)  \right),\ \text{if}\ r>r_0, 
$$

A simple approach would be to constrain the outputs with a piece-wise linear function, or a "clipped relu" function as depicted below:

![activation functions](https://www.researchgate.net/profile/Md-Hossain-331/publication/343096012/figure/fig2/AS:915583516278784@1595303529166/Activation-Functions-f-x-and-their-corresponding-1-st-derivatives-Dx-The-proposed.png)

For the purposes of this notebook, we will constrain dead fuel moisture values to be less than or equal to 250%. Additionally, a physically reasonable process would have moisture content approach the maximum logarithmically, but the minimum moisture content of 0% could be reached more easily. Thus, the "log-tailed relu" depicted above could be preferrable, though this function will approach infinity instead of being capped at a maximum value. We will explore augmenting the log-tailed relu idea to have the range of outputs exponentially approach a maximum value.

Sources:
* [Live Fuel Moistures](https://www.nwcg.gov/publications/pms437/fuel-moisture/live-fuel-moisture-content)
* [Fuel Moisture Processes](https://www.nwcg.gov/publications/pms425-1/weather-and-fuel-moisture), includes discussion of fiber-saturation level

## Alternative ReLU Functions 

Below we define the activation functions and plot the range. (The primary source for this section is Hossain 2020.)

The Clipped-ReLU function is identical to the ReLU up to a threshold value, after which it is constant with zero slope. The mathematical form of the clipped-ReLU with threshold value $A$ is:

$$
f(x)=\begin{cases}
    \max(0, x) &  0<x\leq A\\
    A &  x> A
\end{cases}
$$

This can be easily programmed as a piecewise linear function by taking the maximum of 0 and the input $x$, and then the minimum of that output with the threshold value:

In [None]:
# Define standard ReLU function
def relu(x):
    return tf.keras.backend.maximum(0., x)

# Define clipped ReLU function
def clipped_relu(x, threshold=250):
    return tf.keras.backend.minimum(tf.keras.backend.maximum(0., x), threshold)

In [None]:
xgrid = np.linspace(-100, 400, 50)

In [None]:
plt.ylim(-50, 400)
plt.axline((-1, 0), (0, 0), color="k", linestyle=":") # x axis line
plt.axline((0, 0), (0, 1), color="k", linestyle=":") # y axis line
plt.plot(xgrid, relu(xgrid), label = "Standard Relu", linestyle="dashed")
plt.plot(xgrid, clipped_relu(xgrid), label = "Clipped Relu")
plt.legend()
plt.grid()

The log-tailed ReLU function is similarly identical to the standard ReLU up to a threshold value, and then proceeds logarithmically from there. The mathematical specification, for a threshold value of $A$, is:

$$
f(x)=\begin{cases}
    0 &  x\leq 0\\
    x &  0<x\leq A\\
    A+\log(x-A + 1) &  x> A
\end{cases}
$$

In [None]:
# Define Log-Tailed Relu
def logtailed_relu(x, threshold=240):
    fx = np.maximum(0., x)
    x2 = x[x>threshold]
    fx[np.where(fx>threshold)]=threshold+np.log(x2-threshold+1)
    return fx

In [None]:
plt.ylim(-50, 400)
plt.axline((-1, 0), (0, 0), color="k", linestyle=":") # x axis line
plt.axline((0, 0), (0, 1), color="k", linestyle=":") # y axis line
plt.plot(xgrid, relu(xgrid), label = "Standard Relu", linestyle="dashed")
plt.plot(xgrid, clipped_relu(xgrid), label = "Clipped Relu")
plt.plot(xgrid, logtailed_relu(xgrid), label = "Log-Tailed Relu")
plt.legend()
plt.grid()

The log-tailed ReLU as presented above is virtually identical to the clipped ReLU until very near the saturation level.

The maximum difference in the fitted values is about half of a tenth of a percent, so there was no dramatic effect after training. Fuels rarely get to this level, so maybe it is preferrable to start the log tail at a lower value, such as 25% when most fuels are already at extinction moisture level.

In [None]:
plt.ylim(-50, 400)
plt.axline((-1, 0), (0, 0), color="k", linestyle=":") # x axis line
plt.axline((0, 0), (0, 1), color="k", linestyle=":") # y axis line
plt.plot(xgrid, relu(xgrid), label = "Standard Relu", linestyle="dashed")
plt.plot(xgrid, clipped_relu(xgrid), label = "Clipped Relu")
plt.plot(xgrid, logtailed_relu(xgrid, threshold=25), label = "Log-Tailed Relu")
plt.legend()
plt.grid()

## Testing Models with Various Output Layer Activation

In [None]:
# Data
# Change directory for data read/write

dict_file='../data/raws_CO_202306.pickle' # input path of FMDA dictionaries
reproducibility_file='../data/reproducibility_dict.pickle'

# read test datasets
test_dict={}
test_dict.update(load_and_fix_data(dict_file))
print(test_dict.keys())

repro_dict={}
repro_dict.update(load_and_fix_data(reproducibility_file))
print(repro_dict.keys())
# Build Case Data
id = "CPTC2_202306010000"
case_data=test_dict[id]
case_data["hours"]=len(case_data['fm'])
case_data["h2"]=int(24*20)

In [None]:
# Re-Define log-tailed ReLU function, since I don't know how to pass args through this interface
# Define Log-Tailed Relu
def logtailed_relu(x, threshold=25):
    fx = np.maximum(0., x)
    x2 = x[x>threshold]
    fx[np.where(fx>threshold)]=threshold+np.log(x2-threshold)
    return fx

The clipped ReLU must be scaled...

In [None]:
# Linear Activation Params
param_sets={}
param_sets.update({1:{'id':1,
        'purpose':'Linear Activation',
        'cases':'all',
        'batch_type':2,
        'training':5,
        'scale':1,        # every feature in [0, scale]
        'rain_do':True,
        'verbose':False,
        'timesteps':5,
        'activation':['linear','linear'],
        'hidden_units':1,  
        'dense_units':1,    # do not change
        'dense_layers':1,   # do not change
        'centering':[0.0,0.0],  # should be activation at 0
        'DeltaE':[0,-1],    # bias correction
        'synthetic':False,  # run also synthetic cases
        'T1': 0.1,          # 1/fuel class (10)
        'fm_raise_vs_rain': 2,         # fm increase per mm rain, .143 is calculated from Vanderkamp method
        'train_frac':0.5,  # time fraction to spend on training
        'epochs':2000,
        'verbose_fit':False,
        'verbose_weights':False,
}})

In [None]:
rnn_dat = create_rnn_data(case_data,param_sets[1])
# Define clipped ReLU function
def clipped_relu(x, threshold=250/rnn_dat['scale_fm']):
    return tf.keras.backend.minimum(tf.keras.backend.maximum(0., x), threshold)

In [None]:
param_sets.update({2:{'id':2,
        'purpose':'Clipped ReLU (250 threshold)',
        'cases':'all',
        'scale':1,        # every feature in [0, scale]
        'batch_type':2,
        'training':5,
        'rain_do':True,
        'verbose':False,
        'timesteps':5,
        'activation':['linear',clipped_relu],
        'hidden_units':1,  
        'dense_units':1,    # do not change
        'dense_layers':1,   # do not change
        'centering':[0.0,0.0],  # should be activation at 0
        'DeltaE':[0,-1],    # bias correction
        'synthetic':False,  # run also synthetic cases
        'T1': 0.1,          # 1/fuel class (10)
        'fm_raise_vs_rain': 2,         # fm increase per mm rain, .143 is calculated from Vanderkamp method
        'train_frac':0.5,  # time fraction to spend on training
        'epochs':2000,
        'verbose_fit':False,
        'verbose_weights':False,
}})
param_sets.update({3:{'id':3,
        'purpose':'Log-Tailed ReLU (25 threshold)',
        'cases':'all',
        'scale':1,        # every feature in [0, scale]
        'batch_type':2,
        'training':5,
        'rain_do':True,
        'verbose':False,
        'timesteps':5,
        'activation':['linear',logtailed_relu],
        'hidden_units':1,  
        'dense_units':1,    # do not change
        'dense_layers':1,   # do not change
        'centering':[0.0,0.0],  # should be activation at 0
        'DeltaE':[0,-1],    # bias correction
        'synthetic':False,  # run also synthetic cases
        'T1': 0.1,          # 1/fuel class (10)
        'fm_raise_vs_rain': 2,         # fm increase per mm rain, .143 is calculated from Vanderkamp method
        'train_frac':0.5,  # time fraction to spend on training
        'epochs':2000,
        'verbose_fit':False,
        'verbose_weights':False,
}})

### Check Initial Fit

In [None]:
rnn_dat = create_rnn_data(case_data,param_sets[1])
model1 = train_rnn(
    rnn_dat,
    param_sets[1],
    rnn_dat['hours'],
    fit=False
)
fit1 = rnn_predict(model1, param_sets[1], rnn_dat)

rnn_dat = create_rnn_data(case_data,param_sets[2])
model2 = train_rnn(
    rnn_dat,
    param_sets[2],
    rnn_dat['hours'],
    fit=False
)
fit2 = rnn_predict(model2, param_sets[2], rnn_dat)
plt.plot(fit1, label="Linear Output")
plt.plot(fit2, "--", label="Clipped Relu Output")
plt.legend()

### Check Trained Fit with Good Rain Param

In [None]:
# Linear Activation Params
param_sets={}
param_sets.update({1:{'id':1,
        'purpose':'Linear Activation',
        'cases':'all',
        'batch_type':2,
        'training':5,
        'scale':1,        # every feature in [0, scale]
        'rain_do':True,
        'verbose':False,
        'timesteps':5,
        'activation':['linear','linear'],
        'hidden_units':1,  
        'dense_units':1,    # do not change
        'dense_layers':1,   # do not change
        'centering':[0.0,0.0],  # should be activation at 0
        'DeltaE':[0,-1],    # bias correction
        'synthetic':False,  # run also synthetic cases
        'T1': 0.1,          # 1/fuel class (10)
        'fm_raise_vs_rain': 0.143,         # fm increase per mm rain, .143 is calculated from Vanderkamp method
        'train_frac':0.5,  # time fraction to spend on training
        'epochs':2000,
        'verbose_fit':False,
        'verbose_weights':False,
}})
param_sets.update({2:{'id':2,
        'purpose':'Clipped ReLU (250 threshold)',
        'cases':'all',
        'scale':1,        # every feature in [0, scale]
        'batch_type':2,
        'training':5,
        'rain_do':True,
        'verbose':False,
        'timesteps':5,
        'activation':['linear',clipped_relu],
        'hidden_units':1,  
        'dense_units':1,    # do not change
        'dense_layers':1,   # do not change
        'centering':[0.0,0.0],  # should be activation at 0
        'DeltaE':[0,-1],    # bias correction
        'synthetic':False,  # run also synthetic cases
        'T1': 0.1,          # 1/fuel class (10)
        'fm_raise_vs_rain': 0.143,         # fm increase per mm rain, .143 is calculated from Vanderkamp method
        'train_frac':0.5,  # time fraction to spend on training
        'epochs':2000,
        'verbose_fit':False,
        'verbose_weights':False,
}})

In [None]:
reproducibility.set_seed() # Set seed for reproducibility
params = param_sets[1]

rnn_dat = create_rnn_data(case_data,params)
model1 = train_rnn(
    rnn_dat,
    params,
    rnn_dat['hours'],
    fit=True
)
case_data['m'] = rnn_predict(model1, params, rnn_dat)
fit_linear = case_data['m']

In [None]:
plot_data(case_data,title2='Initial RNN Linear')

In [None]:
reproducibility.set_seed() # Set seed for reproducibility
params = param_sets[2]

rnn_dat = create_rnn_data(case_data,params)
model1 = train_rnn(
    rnn_dat,
    params,
    rnn_dat['hours'],
    fit=True
)
case_data['m'] = rnn_predict(model1, params, rnn_dat)
fit_clipped = case_data['m']

In [None]:
plot_data(case_data,title2='Initial RNN Clipped')

In [None]:
print(np.max(fit_linear - fit_clipped))

No difference. Likely the clip level is too high. 

## Sources

Hossain, Md & Teng, Shyh & Sohel, Ferdous & Lu, Guojun. (2020). Robust Image Classification Using A Low-Pass Activation Function and DCT Augmentation. 