# QRNN Model Selection

This notebook performs a grid search for the best performing neural network configuration for
quantile regression. As basic structure for the neural network a feed forward network is used and
the following paramters are varied:

- Network depth: 1 to 4 layers
- Network width: 16, 32, 64, 128, 256 neurons
- Activation functions: linear, Sigmoid, ReLU, atan


## ipyparallel Setup

In [1]:
import ipyparallel as ipp
c     = ipp.Client(profile='mpi')
lview = c.load_balanced_view()

In [2]:
%%px
%env KERAS_BACKEND=tensorflow
%env OMP_NUM_THREADS=1
import matplotlib; matplotlib.use("agg")
import numpy as np
from typhon.retrieval.qrnn import QRNN

[stdout:0] 
env: KERAS_BACKEND=tensorflow
env: OMP_NUM_THREADS=1
[stdout:1] 
env: KERAS_BACKEND=tensorflow
env: OMP_NUM_THREADS=1
[stdout:2] 
env: KERAS_BACKEND=tensorflow
env: OMP_NUM_THREADS=1
[stdout:3] 
env: KERAS_BACKEND=tensorflow
env: OMP_NUM_THREADS=1
[stdout:4] 
env: KERAS_BACKEND=tensorflow
env: OMP_NUM_THREADS=1
[stdout:5] 
env: KERAS_BACKEND=tensorflow
env: OMP_NUM_THREADS=1
[stdout:6] 
env: KERAS_BACKEND=tensorflow
env: OMP_NUM_THREADS=1
[stdout:7] 
env: KERAS_BACKEND=tensorflow
env: OMP_NUM_THREADS=1
[stdout:8] 
env: KERAS_BACKEND=tensorflow
env: OMP_NUM_THREADS=1
[stdout:9] 
env: KERAS_BACKEND=tensorflow
env: OMP_NUM_THREADS=1
[stdout:10] 
env: KERAS_BACKEND=tensorflow
env: OMP_NUM_THREADS=1
[stdout:11] 
env: KERAS_BACKEND=tensorflow
env: OMP_NUM_THREADS=1
[stdout:12] 
env: KERAS_BACKEND=tensorflow
env: OMP_NUM_THREADS=1
[stdout:13] 
env: KERAS_BACKEND=tensorflow
env: OMP_NUM_THREADS=1
[stdout:14] 
env: KERAS_BACKEND=tensorflow
env: OMP_NUM_THREADS=1
[stdout:15] 
env: KE

[stderr:0] Using TensorFlow backend.
[stderr:1] Using TensorFlow backend.
[stderr:2] Using TensorFlow backend.
[stderr:3] Using TensorFlow backend.
[stderr:4] Using TensorFlow backend.
[stderr:5] Using TensorFlow backend.
[stderr:6] Using TensorFlow backend.
[stderr:7] Using TensorFlow backend.
[stderr:8] Using TensorFlow backend.
[stderr:9] Using TensorFlow backend.
[stderr:10] Using TensorFlow backend.
[stderr:11] Using TensorFlow backend.
[stderr:12] Using TensorFlow backend.
[stderr:13] Using TensorFlow backend.
[stderr:14] Using TensorFlow backend.
[stderr:15] Using TensorFlow backend.
[stderr:16] Using TensorFlow backend.
[stderr:17] Using TensorFlow backend.
[stderr:18] Using TensorFlow backend.
[stderr:19] Using TensorFlow backend.
[stderr:20] Using TensorFlow backend.
[stderr:21] Using TensorFlow backend.
[stderr:22] Using TensorFlow backend.
[stderr:23] Using TensorFlow backend.
[stderr:24] Using TensorFlow backend.


## Model Setup

In [3]:
%%px
quantiles = np.array([0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95])
def create_model(depth, width, act_fn):
    qrnn = QRNN(5, quantiles, depth, width, act_fn)
    return qrnn

## Cross Validation

Cross validation is used to determine the expected values of the quantile loss for each estimated quantile as well as the CRPS score of the estimated posterior.

In [4]:
%%px
from typhon.retrieval.scores import mean_quantile_score
x_train = np.load("src/atms_simulations/data/x_train_5.npy")
y_train = np.load("src/atms_simulations/data/y_train_5.npy")

def score(y_pred, y_test):
    quantile_scores = mean_quantile_score(y_pred, y_test, quantiles)
    crps = QRNN.crps(y_pred, y_test, quantiles)
    return np.append(quantile_scores, crps)

In [6]:
def run_cross_validation(config):
    depth, width, act_fn = config
    qrnn = create_model(depth, width, act_fn)
    return qrnn.cross_validation(x_train, y_train, 1.0, n_folds = 10)

## Running the Calculations

In [7]:
depths = [0, 1, 2, 3]
widths = [8, 16, 32, 64, 96, 128, 256]
act_funcs = ["linear", "tanh", "sigmoid", "relu"]
configs = [(d, w, f) for d in depths for w in widths for f in act_funcs]
async_results = lview.map_async(run_cross_validation, configs)

In [46]:
results = []
for i,r in enumerate(async_results):
    print(configs[i])
    print("Result: " + str(r))
    results += [(configs[i], r)]

(0, 8, 'linear')
Result: (4.5268916137542714, 0.037262705080625382)
(0, 8, 'tanh')
Result: (4.5244718184051518, 0.047940322537508878)
(0, 8, 'sigmoid')
Result: (4.5219965613632214, 0.028443604477790978)
(0, 8, 'relu')
Result: (4.5254493644714353, 0.055157733472477329)
(0, 16, 'linear')
Result: (4.530808489814758, 0.054663646324619779)
(0, 16, 'tanh')
Result: (4.5255652244644171, 0.03952714897145259)
(0, 16, 'sigmoid')
Result: (4.5257472024841308, 0.032462225651199501)
(0, 16, 'relu')
Result: (4.5313666663742067, 0.05968068416569533)
(0, 32, 'linear')
Result: (4.5260719536666869, 0.052145556534043791)
(0, 32, 'tanh')
Result: (4.5268492654266357, 0.041273338209282286)
(0, 32, 'sigmoid')
Result: (4.524876857772826, 0.054574557531647784)
(0, 32, 'relu')
Result: (4.5238178139419549, 0.060001441647477123)
(0, 64, 'linear')
Result: (4.5273485811500551, 0.033067939106466)
(0, 64, 'tanh')
Result: (4.5225239240417476, 0.063427679513604771)
(0, 64, 'sigmoid')
Result: (4.5294175024948125, 0.032228

In [16]:
import numpy as np
res = {"linear"  : np.zeros((len(depths), len(widths), 2)),
       "relu"    : np.zeros((len(depths), len(widths), 2)),
       "tanh"    : np.zeros((len(depths), len(widths), 2)),
       "sigmoid" : np.zeros((len(depths), len(widths), 2))}
inds = dict(zip(widths, range(len(widths))))

for ((n_layers, width, act), (mean, std)) in results:
    res[act][int(n_layers), inds[width], 0] = mean
    res[act][int(n_layers), inds[width], 1] = std

In [44]:
def print_table(res, fn = None):
    s = r""
    for j in range(res.shape[1]):
        s += r" & $n_n = {0}$ ".format(widths[j])
    s += r"\\ \hline"
    for i in range(res.shape[0]):
        s += "$n_h =  {0}$ & ".format(i)
        for j in range(res.shape[1] - 1):
            s += r"${:.2} \pm {:.2}$ & ".format(res[i, j, 0], res[i, j, 1])
        s += r"${:.2} \pm {:.2}$ \\ ".format(res[i, j, 0], res[i, j, 1])
    s+="\hline"
        
    if fn:
        f = open(fn, "w")
        f.write(s)
        f.close()
    else:
        return s    
    
def print_table2(res1, res2, fn = None):
    s = ""
    for i in range(res1.shape[0]):
        s += "$n_h = i$ &"
        for j in range(res1.shape[1]):
            s += r"${:.2} \pm {:.2}$ & ".format(res1[i, j, 0], res1[i, j, 1])
        for j in range(res2.shape[1] - 1):
            s += r"${:.2} \pm {:.2}$ & ".format(res2[i, j, 0], res2[i, j, 1])
        s += r"${:.2} \pm {:.2}$ \\ ".format(res2[i, j, 0], res2[i, j, 1])
        
    if fn:
        f = open(fn, "w")
        f.write(s)
        f.close()
    else:
        return s    

In [45]:
print_table(res["linear"],  "tables/linear.tbl")
print_table(res["sigmoid"], "tables/sigmoid.tbl")
print_table(res["tanh"],    "tables/tanh.tbl")
print_table(res["relu"],    "tables/relu.tbl")

## Training Parameters

In [5]:
def run_cross_validation(config):
    batch_size, lr_decay, lr_minimum, convergence_epochs = config
    qrnn = create_model(3, 128, "relu")
    return qrnn.cross_validation(x_train, y_train, 1.0, n_folds = 10,
                                 batch_size = batch_size,
                                 learning_rate_decay = lr_decay,
                                 learning_rate_minimum = lr_minimum,
                                 convergence_epochs = convergence_epochs)

In [6]:
configs = []
configs += [(bs, 2.0, 1e-6, 2) for bs in [128, 256, 512, 1024]]
configs += [(256, lrd, 1e-6, 2) for lrd in [1.5, 2.0, 5.0, 10.0]]
configs += [(256, 2.0, 10 ** -lrm, 2) for lrm in [4, 5, 6, 7, 8]]
configs += [(256, 2.0, 1e-6, ce) for ce in [1, 2, 4, 8]]

In [8]:
async_results = lview.map_async(run_cross_validation, configs)

In [9]:
results = []
for i,r in enumerate(async_results):
    print(configs[i])
    print("Result: " + str(r))
    results += [(configs[i], r)]

(128, 2.0, 1e-06, 2)
Result: (0.90578844951438897, 0.0081078108798440015)
(256, 2.0, 1e-06, 2)
Result: (0.91163818877601632, 0.0064749937914909033)
(512, 2.0, 1e-06, 2)
Result: (0.92271153948783868, 0.020830085622155849)
(1024, 2.0, 1e-06, 2)
Result: (0.93683845387268061, 0.020763810960643149)
(256, 1.5, 1e-06, 2)
Result: (0.91013961412239086, 0.0063265646771658041)
(256, 2.0, 1e-06, 2)
Result: (0.91320789633178712, 0.012580609311199118)
(256, 5.0, 1e-06, 2)
Result: (0.91662052987861631, 0.01052368406147664)
(256, 10.0, 1e-06, 2)
Result: (0.91795649431800841, 0.015360465991840955)
(256, 2.0, 0.0001, 2)
Result: (0.91043027124023435, 0.006420343418558328)
(256, 2.0, 1e-05, 2)
Result: (0.91255380694198607, 0.0070071830737801141)
(256, 2.0, 1e-06, 2)
Result: (0.91766805414772035, 0.010873377442336652)
(256, 2.0, 1e-07, 2)
Result: (0.91874591431236274, 0.014040805722100822)
(256, 2.0, 1e-08, 2)
Result: (0.91810450534629806, 0.021963042344801943)
(256, 2.0, 1e-06, 1)
Result: (0.9156301912040

In [12]:
configs = []
configs += [(64, lrd, 1e-6, 2) for lrd in [1.2, 1.5, 2.0]]
configs += [(64, 2.0, 10 ** -lrm, 2) for lrm in [3, 4, 5, 6]]
configs += [(64, 2.0, 1e-6, ce) for ce in [1, 2, 4, 8]]
async_results = lview.map_async(run_cross_validation, configs)

In [13]:
results = []
for i,r in enumerate(async_results):
    print(configs[i])
    print("Result: " + str(r))
    results += [(configs[i], r)]

(64, 1.2, 1e-06, 2)
Result: (0.89952332299423199, 0.0065377489453016082)
(64, 1.5, 1e-06, 2)
Result: (0.90161739125633233, 0.0086176645337436245)
(64, 2.0, 1e-06, 2)
Result: (0.90152903881454471, 0.00615886249641244)
(64, 2.0, 0.001, 2)
Result: (0.91008012251090997, 0.0050518619193384105)
(64, 2.0, 0.0001, 2)
Result: (0.90170960564231883, 0.0087728255696332968)
(64, 2.0, 1e-05, 2)
Result: (0.90167167879104626, 0.0068331846874113526)
(64, 2.0, 1e-06, 2)
Result: (0.900728275522232, 0.0093753006703030176)
(64, 2.0, 1e-06, 1)
Result: (0.90539736339759824, 0.0086392432600369915)
(64, 2.0, 1e-06, 2)
Result: (0.90346704487419127, 0.0066362010197610699)
(64, 2.0, 1e-06, 4)
Result: (0.90078221081542953, 0.0075512973896067958)
(64, 2.0, 1e-06, 8)
Result: (0.8986799619350434, 0.0041661199044197043)
