 # Table of Contents
<div class="toc" style="margin-top: 1em;"><ul class="toc-item" id="toc-level0"><li><span><a href="http://localhost:8889/notebooks/rnn_test.ipynb#Set-up-data" data-toc-modified-id="Set-up-data-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Set up data</a></span></li><li><span><a href="http://localhost:8889/notebooks/rnn_test.ipynb#Build-model" data-toc-modified-id="Build-model-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Build model</a></span></li><li><span><a href="http://localhost:8889/notebooks/rnn_test.ipynb#Get-additional-variables" data-toc-modified-id="Get-additional-variables-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Get additional variables</a></span></li></ul></div>

In [201]:
# Imports
from importlib import reload
import crps_loss; reload(crps_loss)
from crps_loss import crps_cost_function, crps_cost_function_seq
import utils; reload(utils)
from utils import *
import matplotlib.pyplot as plt
%matplotlib inline

import pdb

import keras
from keras.layers import Input, Dense, merge, Embedding, Flatten, Dropout, \
    SimpleRNN, LSTM, TimeDistributed, GRU, Dropout, Masking
from keras.layers.merge import Concatenate
from keras.models import Model, Sequential
import keras.backend as K
from keras.callbacks import EarlyStopping
from keras.optimizers import SGD, Adam

In [2]:
# Use this if you want to limit the GPU RAM usage
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5
set_session(tf.Session(config=config))

In [3]:
keras.backend.backend()

'tensorflow'

In [4]:
# Basic setup
# DATA_DIR = '/Volumes/STICK/data/ppnn_data/'  # Mac
DATA_DIR = '/project/meteo/w2w/C7/ppnn_data/'   # LMU
results_dir = '../results/'
window_size = 25   # Days in rolling window
fclt = 48   # Forecast lead time in hours

## Set up data

In [202]:

train_dates = ['2015-01-01', '2016-01-01']
test_dates =  ['2016-01-01', '2017-01-01']
train_set, test_set = get_train_test_sets(DATA_DIR, train_dates, test_dates, 
                                          seq_len=5, fill_value=-999.)

train set contains 365 days
test set contains 366 days


## Build model

In [203]:
batch_size = 64
hidden_nodes = 20

In [204]:
inp = Input(shape=(seq_len, 2, )) # time step, feature
x = GRU(hidden_nodes, return_sequences=True)(inp)
x = TimeDistributed(Dense(2, activation='linear'))(x)
rnn_model = Model(inputs=inp, outputs=x)

In [205]:
rnn_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_15 (InputLayer)        (None, 5, 2)              0         
_________________________________________________________________
gru_15 (GRU)                 (None, 5, 20)             1380      
_________________________________________________________________
time_distributed_14 (TimeDis (None, 5, 2)              42        
Total params: 1,422
Trainable params: 1,422
Non-trainable params: 0
_________________________________________________________________


In [206]:
rnn_model.compile(optimizer=Adam(0.01), loss=crps_cost_function_seq, sample_weight_mode="temporal")

In [207]:
rnn_model.fit(train_set.features, train_set.targets, epochs=10, batch_size=1024, 
              sample_weight=train_set.sample_weights)
#rnn_model.fit(x_seq_train, y_seq_train, epochs=10, batch_size=1024)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fcb2c133710>

In [209]:
rnn_model.evaluate(test_set.features, test_set.targets, batch_size=4096, 
                   sample_weight=test_set.sample_weights)



1.0151006056068379

## Predict only one value

In [169]:
inp = Input(shape=(seq_len, 2, )) # time step, feature
x = GRU(hidden_nodes)(inp)
x = Dense(2, activation='linear')(x)
rnn_model2 = Model(inputs=inp, outputs=x)

In [170]:
rnn_model2.compile(optimizer=Adam(0.001), loss=crps_cost_function)

In [171]:
rnn_model2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_14 (InputLayer)        (None, 5, 2)              0         
_________________________________________________________________
gru_14 (GRU)                 (None, 20)                1380      
_________________________________________________________________
dense_14 (Dense)             (None, 2)                 42        
Total params: 1,422
Trainable params: 1,422
Non-trainable params: 0
_________________________________________________________________


In [173]:
rnn_model2.fit(x_seq_train, y_seq_train[:,-1], epochs=5, batch_size=1024,
              validation_data=(x_seq_test, y_seq_test[:,-1]))
#rnn_model2.fit(x_seq_train, y_seq_train[:,-1], epochs=10, batch_size=1024)

Train on 180849 samples, validate on 182218 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fcb2d435e80>

## Get additional variables

In [212]:
from collections import OrderedDict
aux_dict = OrderedDict()
aux_dict['data_aux_geo_interpolated.nc'] = ['orog', 
                                            'station_alt', 
                                            'station_lat', 
                                            'station_lon']
aux_dict['data_aux_pl500_interpolated_00UTC.nc'] = ['u_pl500_fc',
                                                    'v_pl500_fc',
                                                    'gh_pl500_fc']
aux_dict['data_aux_pl850_interpolated_00UTC.nc'] = ['u_pl850_fc',
                                                    'v_pl850_fc',
                                                    'q_pl850_fc']
aux_dict['data_aux_surface_interpolated_00UTC.nc'] = ['cape_fc',
                                                      'sp_fc',
                                                      'tcc_fc']

In [213]:
train_set, test_set = get_train_test_sets(DATA_DIR, train_dates, test_dates, 
                                          seq_len=5, fill_value=-999., aux_dict=aux_dict)

train set contains 365 days
test set contains 366 days


In [214]:
n_features = train_set.features.shape[-1]
n_features

24

In [233]:
inp = Input(shape=(seq_len, n_features, )) # time step, feature
x = GRU(20, return_sequences=True)(inp)
# x = Dropout(0.5)(x)
# x = TimeDistributed(Dense(2, activation='linear'))(x)
x = TimeDistributed(Dense(2))(x)
rnn_model = Model(inputs=inp, outputs=x)

In [234]:
rnn_model.compile(optimizer=Adam(0.01), loss=crps_cost_function_seq, sample_weight_mode="temporal")

In [235]:
for i in range(10):
    rnn_model.fit(train_set.features, train_set.targets, epochs=1, batch_size=1024, 
                  sample_weight=train_set.sample_weights, verbose=0)
    print('Test', rnn_model.evaluate(train_set.features, train_set.targets, batch_size=4096, 
                   sample_weight=train_set.sample_weights, verbose=0))
    print('Valid', rnn_model.evaluate(test_set.features, test_set.targets, batch_size=4096, 
                   sample_weight=test_set.sample_weights, verbose=0))

Test 1.47494211695
Valid 1.50654933895
Test 0.977439547093
Valid 0.976263109129
Test 0.944071006104
Valid 0.952683184602
Test 0.927436171778
Valid 0.947129741995
Test 0.921549836288
Valid 0.941121052018
Test 0.910908860636
Valid 0.946302759566
Test 0.907522408847
Valid 0.943646217648
Test 0.899797803507
Valid 0.943910612833
Test 0.894670393442
Valid 0.942409784089
Test 0.891969689318
Valid 0.9460832431


In [236]:
inp = Input(shape=(seq_len, n_features, )) # time step, feature
x = GRU(20)(inp)
x = Dense(2, activation='linear')(x)
rnn_model2 = Model(inputs=inp, outputs=x)

In [237]:
rnn_model2.compile(optimizer=Adam(0.01), loss=crps_cost_function)

In [239]:
rnn_model2.fit(train_set.features, train_set.targets[:,-1], epochs=5, batch_size=1024,
              validation_data=(test_set.features, test_set.targets[:,-1]))

Train on 180849 samples, validate on 182218 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fcb07e897b8>