In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt

from keras import losses
import keras

from src.data import get_generators, format_bytes
from src.model import get_2losses_model, add_clf_layer, add_clf_layer_sparse, get_model, get_RAEwSC_compiled, get_RAEwSC_and_WS_compiled
from src.losses import cross_entropy_with_axis, model_evaluate, write_submission_files

from keras.utils.vis_utils import plot_model
# plot_model(model, show_shapes=True, show_layer_names=True)
# start tensorboard: tensorboard --logdir .

Using TensorFlow backend.


In [109]:
# define global names
EXTRA_DATA_MODEL, EXTRA_DATA_MODELwIN, RAEwSCwWSwINwCLF = "RAE_w_SC_WS", "RAE_w_SC_WS_wIN", "RAEwSCwWSwINwCLF"

# define model and city
MODEL_TYPES = {0: "ConvLSTM", 1: "ConvLSTM+Clf", 2: "RAE_w_SC", 3: EXTRA_DATA_MODEL, 4: EXTRA_DATA_MODELwIN, 
               5: RAEwSCwWSwINwCLF}
CITIES = {0: 'Moscow', 1: 'Istanbul', 2: 'Berlin'}
DATA_SPLIT = {0: "non-overlapping", 1: "all_possible_slots", 2: "like-test"}

############################################# DEFINE HYPERPARAMETERS
DATA_SPLIT_IDX = 2 # choose how to split data
params = {
    # data params
    "batch_size": 4,
    "length_seq_in": 3, #3, # Set up when calling a model
    "length_seq_out": 3,
    "batch_size_validation": 48,
    "data_split": DATA_SPLIT[DATA_SPLIT_IDX], # Set up when calling a model
    "batch_size_test": 5,
    # loss params
    'optimizer': 'adam',  # adam or sgd
    'learning_rate': 0.001,
    'decay_rate': 0.9999,  # Learning rate decay per minibatch.
    'min_learning_rate': .0000001,  # Minimum learning rate.
}

############################################# DEFINE MODEL WEIGHTS
models_weight = {
    "ConvLSTM": { # _weights_e04_valoss0.009_relu 
        "Moscow": "/home/pherruzo/projects/nips_traffic/checkpoints/{}_model_ep_2.h5".format("Moscow"), 
        "Istanbul": "/home/pherruzo/projects/nips_traffic/checkpoints/{}_weights_e03_valoss0.009_relu3.hdf5".format("Istanbul"), 
        "Berlin": "/home/pherruzo/projects/nips_traffic/checkpoints/{}_model_ep_5.h5".format("Berlin"),
        "Model_definition_file": "{}/projects/nips_traffic/src/models.py".format("/home/pherruzo"),
        "Model_definition_function": "build_model" 
    },
    
    "ConvLSTM+Clf": { # _weights_e04_valoss0.025_relu.hdf5, _weights_e04_valoss0.025_relu2.hdf
        "Moscow": "/home/pherruzo/projects/nips_traffic/checkpoints/{}_weights_e04_valoss0.025_relu2.hdf5".format("Moscow"),
        "Istanbul": "/home/pherruzo/projects/nips_traffic/checkpoints/{}_weights_06-1.04.hdf5".format("Istanbul"), # weights_Istanbul_06-1.04.hdf5, weights_Istanbul_04-1.05.hdf5, weights_Istanbul_03-1.05.hdf5  
        "Berlin": "/home/pherruzo/projects/nips_traffic/checkpoints/{}_model_ep_1_reg_clf_cont.h5".format("Berlin")
    },
    
    "RAE_w_SC": { # best: _weights_e03_valoss0.012_raewsc_sgd | prev: _weights_e02_valoss0.014_raewsc_sgd_emb
        "Moscow": "/home/pherruzo/projects/nips_traffic/checkpoints/{}_weights_e03_valoss0.012_raewsc_sgd.hdf5".format("Moscow"),
        "Istanbul": None, 
        "Berlin": None
    },
    
    EXTRA_DATA_MODEL: { # best: _e02_valoss0.012_exo_vars, best in test-like: _e91_valoss0.012_just_try 
        "Moscow": "/home/pherruzo/projects/nips_traffic/checkpoints/{}_e91_valoss0.012_just_try.hdf5".format("Moscow"), # works good for num_seq_in=3
        "Istanbul": None,
        "Berlin": None
    },
    
    EXTRA_DATA_MODELwIN: { # best (all slots): _e04_valoss0.012_RAEwSCwWTwIN_new_all_slots, # like-test: _e18_valoss0.012_RAEwSCwWTwINz_all_slots_like_test
        "Moscow": "/home/pherruzo/projects/nips_traffic/checkpoints/{}_e18_valoss0.012_RAEwSCwWTwINz_all_slots_like_test.hdf5".format("Moscow"),
        "Istanbul": None,
        "Berlin": None
    },
    
    RAEwSCwWSwINwCLF: { # best (all slots): _e19_valoss0.502_RAEwSCwWSwINwCLF #  _e12_valoss1.294_RAEwSCwWSwINwCLF
        "Moscow": "/home/pherruzo/projects/nips_traffic/checkpoints/{}_e44_valoss0.026_RAEwSCwWSwINwCLF.hdf5".format("Moscow"),
        "Istanbul": None,
        "Berlin": None,
        "loss_type": [None, 'Sparse'][1]#'Categorical'
    }
}

############################################# PREPARE POSSIBLE MODELS
def get_a_model(model_type, training_params):
    """ model_type in ["ConvLSTM+Clf", "ConvLSTM", "RAE_w_SC"] """
    
    if model_type=="ConvLSTM+Clf":
        return get_2losses_model(model_path=models_weight[training_params["previus_model"]]["Model_definition_file"], 
                                model_name=models_weight[training_params["previus_model"]]["Model_definition_function"], 
                                weights_initial_model=None, 
                                weights_current_model=models_weight[training_params["current_model"]][training_params["city"]], 
                                add_model=add_clf_layer_sparse, 
                                add_loss=losses.sparse_categorical_crossentropy,
                                sample_weight_mode="temporal")
    
    elif model_type=="ConvLSTM":
        from keras import optimizers
        return get_model(model_path=models_weight[training_params["previus_model"]]["Model_definition_file"], 
                         function_name=models_weight[training_params["previus_model"]]["Model_definition_function"], 
                         weights=models_weight[training_params["previus_model"]][training_params["city"]], 
                         opt=optimizers.Adam(lr=0.0001), loss=losses.mean_squared_error)
    
    elif model_type=="RAE_w_SC":
        return get_RAEwSC_compiled(weights=models_weight[model_type][training_params["city"]], 
                                   lr=params['learning_rate'])
    elif model_type==EXTRA_DATA_MODEL:
        dropout_enc, b_norm_enc = 0.04, True
        dropout_dec, b_norm_dec = 0.03, True
        print("dropout_enc:", dropout_enc, "dropout_dec:", dropout_dec)
        # get basic model def get_RAEwSC_and_WS_compiled(weights=None, lr = 0.001, grad_clip=1., loss_weights={'predicted_frames':1., 'predicted_emb':1.}):
        return get_RAEwSC_and_WS_compiled(weights=models_weight[model_type][training_params["city"]], lr=params['learning_rate'],
                                          loss_weights={'predicted_frames':1., 'predicted_emb':1.},
                                           dropout_enc=dropout_enc, dropout_dec=dropout_dec, 
                                           b_norm_enc=b_norm_enc, b_norm_dec=b_norm_dec)
    elif model_type==EXTRA_DATA_MODELwIN:
        from src.models.RAEwSCwWSwIN import get_RAEwSC_and_WS_compiled_w_input
        
        loss_w = {'predicted_frames':1., 'predicted_emb':0.9}
        
        dropout_enc = 0.05
        dropout_dec = 0.03
        
        print("loading {} ...".format(model_type))
        print("dropout_enc:", dropout_enc, "dropout_dec:", dropout_dec)
        print("loss weights:", loss_w)
        return get_RAEwSC_and_WS_compiled_w_input(weights=models_weight[model_type][training_params["city"]], length_seq_in=params['length_seq_in'], 
                                                  lr=params['learning_rate'], loss_weights=loss_w,
                                                  dropout_enc=dropout_enc, dropout_dec=dropout_dec, for_new_model=True) ######### This is True only to create the new model
    
    elif model_type==RAEwSCwWSwINwCLF:
        from src.models.RAEwSCwWSwINxCLF import get_RAEwSCwWSwIwCLF_compiled as get_model
        
        loss_w = {'predicted_frames':0., 'predicted_emb':1.}
        
        dropout_enc = 0.05
        dropout_dec = 0.03
        
        print("loading {} ...".format(model_type))
        print("dropout_enc:", dropout_enc, "dropout_dec:", dropout_dec)
        print("loss weights:", loss_w)
        return get_model(weights=models_weight[model_type][training_params["city"]], length_seq_in=params['length_seq_in'], 
                         lr=params['learning_rate'], loss_weights=loss_w,
                         dropout_enc=dropout_enc, dropout_dec=dropout_dec)
    # get_RAEwSCwWSwIwCLF_compiled

############################################# DEFINE TRAINING PARAMS
def get_training_params(model_type, city):
    """ model_type in ["ConvLSTM", "ConvLSTM+Clf", "RAE_w_SC", "RAE_w_SC_WS"] 
        city in ['Moscow', 'Istanbul', 'Berlin']
    """
    training_params = {
        "city": city,
        "previus_model": "ConvLSTM",     # used in old methods, NOT TO CHANGE todo: replace
        "current_model": "ConvLSTM+Clf", # used in old methods, NOT TO CHANGE todo: replace
        "tensorboard_path": "/home/pherruzo/projects/nips_traffic/tensorboard/",
        "log_path": "/home/pherruzo/projects/nips_traffic/log_files/",
        "model_type": model_type,
        "output_dir": "/home/pherruzo/data/nips_traffic_submissions/conv_clf_mixt"
    }
    return training_params

############################################# LOAD DATA GENERATOR
def load_model_and_data(model_type, city, params, length_seq_in=None, data_split=None, debug=False):
    
    if length_seq_in is not None:
        params["length_seq_in"] = length_seq_in
        
    if data_split is not None:
        params["data_split"] = DATA_SPLIT[data_split]
        
    # load training params
    training_params = get_training_params(model_type, city)
    
    # load data
    if "loss_type" in models_weight[model_type]:
        loss_type = models_weight[model_type]["loss_type"]
        print("---> loss type for clf:", loss_type)
    else:
        loss_type = None
    training_ds, val_ds, test_ds = get_generators(training_params["model_type"],   params["batch_size"], 
                                                  training_params["city"],         params["length_seq_in"], 
                                                  params["batch_size_validation"], data_split=params["data_split"], debug=debug, loss_type=loss_type)
    # load model
    model = get_a_model(model_type, training_params)
    
    # print info
    print("")
    print("· length_seq_in:", params["length_seq_in"])
    print("· data_split:", params["data_split"])
    print("· model_type:", training_params["model_type"])
    print("· city      :", training_params["city"])
    print(" 1 epoch training is {} batches, validation is {}, and test is {}.".format(len(training_ds), len(val_ds), len(test_ds)))
    
    return training_ds, val_ds, test_ds, model, training_params

def plot(keys_list, history):

    leg = []
    for k in keys_list:
        plt.plot(history.history[k], '-o')
        leg.append(k)
    
    plt.title(leg)
    plt.xlabel('epoch')
    plt.legend(leg, loc='center')#'upper left')
    plt.show()

## Building the new model

### 1. Load model RAE_w_SC_WS_wIN

In [5]:
#### load data and model --> 
# choose model          {0: "ConvLSTM", 1: "ConvLSTM+Clf", 2: "RAE_w_SC", 3: "RAE_w_SC_WS", 4: "RAE_w_SC_WS_wIN", 5: "RAEwSCwWSwINwCLF"}
# choose city           {0: 'Moscow', 1: 'Istanbul', 2: 'Berlin'}
# choose data-split     {0: "non-overlapping", 1: "all_possible_slots", 2: "like-test"}
# choose length_seq_in  3, 4, 5, 6, 7, 8, 9, 10, 11, 12

params['learning_rate'] = 0.01
debug = False

m_type, city = 4, 0
data_split_idx = 2
length_seq_in = 6

training_ds, val_ds, test_ds, model, training_params = load_model_and_data(MODEL_TYPES[m_type], CITIES[city], params, 
                                                                           length_seq_in=length_seq_in, data_split=data_split_idx, debug=debug)

-->> Data has been shuffled in: training


W1022 09:13:58.139183 140354142054208 deprecation_wrapper.py:119] From /home/pherruzo/anaconda3/envs/nips/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W1022 09:13:58.147670 140354142054208 deprecation_wrapper.py:119] From /home/pherruzo/anaconda3/envs/nips/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W1022 09:13:58.150118 140354142054208 deprecation_wrapper.py:119] From /home/pherruzo/anaconda3/envs/nips/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4185: The name tf.truncated_normal is deprecated. Please use tf.random.truncated_normal instead.

W1022 09:13:58.166394 140354142054208 deprecation_wrapper.py:119] From /home/pherruzo/anaconda3/envs/nips/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_se

loading RAE_w_SC_WS_wIN ...
dropout_enc: 0.05 dropout_dec: 0.03
loss weights: {'predicted_frames': 1.0, 'predicted_emb': 0.9}


W1022 09:13:58.648825 140354142054208 deprecation_wrapper.py:119] From /home/pherruzo/anaconda3/envs/nips/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:1834: The name tf.nn.fused_batch_norm is deprecated. Please use tf.compat.v1.nn.fused_batch_norm instead.

W1022 09:13:58.723539 140354142054208 deprecation_wrapper.py:119] From /home/pherruzo/anaconda3/envs/nips/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3976: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

W1022 09:13:58.726788 140354142054208 deprecation.py:506] From /home/pherruzo/anaconda3/envs/nips/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Encoder output shape: (?, 8, 8, 32) reshaped: (?, ?)
Decoder input shape: (?, 2048) reshaped: (?, 8, 8, 32)
embeddings.shape: (?, 6, 2048)
future_embeddings.shape: (?, 3, 2048)
concatenation of all inputs: (?, 6, 2388)
FC before recurrent embeddings.shape: (?, 6, 2048)
recurrent embeddings.shape: (?, ?, 2048)
prediced_frames.shape: (?, 3, 495, 436, 3)
loading weitghs: /home/pherruzo/projects/nips_traffic/checkpoints/Moscow_e18_valoss0.012_RAEwSCwWTwINz_all_slots_like_test.hdf5
Freezing layers...

· length_seq_in: 6
· data_split: like-test
· model_type: RAE_w_SC_WS_wIN
· city      : Moscow
 1 epoch training is 357 batches, validation is 5, and test is 72.


### 2. Freezing Layers & Compiling

In [6]:
###################################### change layer name
# model.get_layer("Concat_predicted_frames").name = "Concat_predicted_frames_2"

###################################### freeze layers
# once we load the weight we freeze all layers
for i, layer in enumerate(model.layers):
#     if 'norm'  in layer.name:
        layer.trainable = False

# freeze also nested layers
for i, layer in enumerate(model.get_layer("encoder").layers):
#     if 'norm' not in layer.name:
        layer.trainable = False
        
for i, layer in enumerate(model.get_layer("decoder").layers):
#     if 'norm' not in layer.name:
        layer.trainable = False

In [9]:
from keras import models, activations
from keras import layers
from keras import optimizers
from keras import losses

# compile with a custom loss that handle both regression and classification
lr = 0.001
grad_clip=1.
optimizer = [optimizers.SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=True), optimizers.Adam(lr=lr, clipvalue=grad_clip)][-1]
print("lr:", lr, 'optimizer:', optimizer)
    
model.compile(optimizer=optimizer,
              loss=[losses.mean_squared_error],
              metrics=[losses.mean_squared_error])

lr: 0.001 optimizer: <keras.optimizers.Adam object at 0x7fa64ea56908>


In [11]:
# check evaluation
model.evaluate_generator(val_ds)

[0.01236429757305554, 0.012364300579896995]

### 3. Building new layers on top of the model

In [13]:
import tensorflow as tf
sess = tf.compat.v1.Session()

from keras import backend as K
K.set_session(sess)

from keras import models, activations
from keras import layers
from keras import optimizers
from keras import losses
# from keras import backend as K 
from tensorflow import device

from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.utils.np_utils import to_categorical  

# imports for RAE_w_SC
from keras import layers
from keras.layers import Input, BatchNormalization, Activation, Dense, Dropout, ZeroPadding2D, Cropping2D, TimeDistributed
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers import concatenate
from keras.optimizers import Adam
from keras.models import Model, load_model
from keras.layers.core import Lambda, RepeatVector, Reshape
from keras import losses, optimizers

# own libraries
from src.losses import *

ACTIVATION = ['relu', 'elu'][-1]

# ###################################### change layer name
# model.get_layer("Concat_predicted_frames").name = "Concat_predicted_frames_2"

# ###################################### freeze layers
# # once we load the weight we freeze all layers
# for i, layer in enumerate(model.layers):
#     if 'norm' not in layer.name:
#         layer.trainable = False

# # freeze also nested layers
# for i, layer in enumerate(model.get_layer("encoder").layers):
#     if 'norm' not in layer.name:
#         layer.trainable = False
        
# for i, layer in enumerate(model.get_layer("decoder").layers):
#     if 'norm' not in layer.name:
#         layer.trainable = False
        
# get needed layers (batch, seq_out, width, height, channel)
new_frames = model.output

###################################### DEFINE NEW MODEL
out_clf = 5
channels_out = 3
################################# add clf
name = 'clf'
n_filters = out_clf
kernel_size = 3
# first layer
x = TimeDistributed( Conv2D(filters=n_filters, kernel_size=(kernel_size, kernel_size), kernel_initializer="he_normal", padding="same", name=name+'_conv1') ) (new_frames)
x = TimeDistributed( BatchNormalization(trainable=True, name=name+'_BN1') ) (x)
x = TimeDistributed( Activation(ACTIVATION) ) (x)
# second layer
x = TimeDistributed( Conv2D(filters=n_filters, kernel_size=(kernel_size, kernel_size), kernel_initializer="he_normal", padding="same", name=name+'_conv2') )(x)
x = TimeDistributed( BatchNormalization(trainable=True, name=name+'_BN2') ) (x)
x = TimeDistributed( Activation(ACTIVATION) ) (x)

# prepare the clfs for the loss
clfs = layers.Reshape((-1, out_clf))(x)      # vectorize all frames
clfs = layers.Softmax(name='softmax_clf')(clfs) # apply softmax

################################# add clf to decoder output

# concat decoder output with clf
concat_0 = concatenate([new_frames, x])
concat = TimeDistributed( BatchNormalization(trainable=True, name='concat_1_bn') ) (concat_0)
concat_1 = TimeDistributed( Activation(ACTIVATION) ) (concat)

# spatial branch
# conv 1
c1 = TimeDistributed( Conv2D(channels_out*3, (kernel_size, kernel_size), activation='elu', padding="same"), name='c1_1' ) (concat_1)
c1 = TimeDistributed( BatchNormalization(trainable=True, name='bn_c1_1') ) (c1)
c1 = TimeDistributed( Activation(ACTIVATION) ) (c1)
# conv 2
c1 = TimeDistributed( Conv2D(channels_out*2, (kernel_size, kernel_size), activation='elu', padding="same"), name='c1_2' ) (c1)
c1 = TimeDistributed( BatchNormalization(trainable=True, name='bn_c1_1') ) (c1)
c1 = TimeDistributed( Activation(ACTIVATION) ) (c1)

# depth branch
# conv 1
c2 = TimeDistributed( Conv2D(channels_out*3, (1, 1), activation='elu', padding="same"), name='c2_1' ) (concat_1)
c2 = TimeDistributed( BatchNormalization(trainable=True, name='bn_c2_1') ) (c2)
c2 = TimeDistributed( Activation(ACTIVATION) ) (c2)
# conv 2
c2 = TimeDistributed( Conv2D(channels_out*2, (1, 1), activation='elu', padding="same"), name='c2_2' ) (concat_1)
c2 = TimeDistributed( BatchNormalization(trainable=True, name='bn_c2_2') ) (c2)
c2 = TimeDistributed( Activation(ACTIVATION) ) (c2)

# concat depth convs & spatial convs
concat = concatenate([concat_0, c1, c2])
concat = TimeDistributed( BatchNormalization(trainable=True, name='concat_2_bn') ) (concat)
concat = TimeDistributed( Activation(ACTIVATION) ) (concat)

# final conv
prediced_frames = TimeDistributed( Conv2D(channels_out, (1, 1), activation='elu'), name='Concat_predicted_frames' ) (concat)

model = models.Model(inputs=model.input, outputs=[prediced_frames, clfs], name='final_model')

###################################### COMPILE MODEL
general_loss_weights={'Concat_predicted_frames':1., 'softmax_clf':.1}


# compile with a custom loss that handle both regression and classification
lr = 0.1, 
grad_clip=1.
optimizer = [optimizers.SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=True), optimizers.Adam(lr=lr, clipvalue=grad_clip)][-1]
print("lr:", lr, 'optimizer:', optimizer)

# # load weights
# name = [ 'Moscow_e02_valoss0.025_RAEwSCwWSwINwCLF_end.hdf5', 'Moscow_e06_valoss0.024_RAEwSCwWSwINwCLF_end.hdf5'][-1]
# weights = '/home/pherruzo/projects/nips_traffic/checkpoints/' + name
# if weights is not None:
#     print('loading weitghs:', weights)
#     model.load_weights(weights, by_name=True, skip_mismatch=True) #l24)#, by_name=True)

model.compile(optimizer=optimizer,
              loss={'Concat_predicted_frames': 'mse',
                    'softmax_clf': losses.sparse_categorical_crossentropy},
              metrics={'Concat_predicted_frames': ['mse'], 
                       'softmax_clf':             [losses.sparse_categorical_crossentropy]},#'accuracy']},
              loss_weights=general_loss_weights, 
              sample_weight_mode="temporal")

###################################### GET DATA FOR THIS MODEL
training_params["model_type"] = RAEwSCwWSwINwCLF
if "loss_type" in models_weight[training_params["model_type"]]:
    loss_type = models_weight[training_params["model_type"]]["loss_type"]
    print("---> loss type for clf:", loss_type)
else:
    loss_type = None
    
training_ds, val_ds, test_ds = get_generators(training_params["model_type"],   params["batch_size"], 
                                                  training_params["city"],         params["length_seq_in"], 
                                                  params["batch_size_validation"], data_split=params["data_split"], debug=debug, loss_type=loss_type)

# Init vars
keras.backend.get_session().run(tf.global_variables_initializer())

lr: (0.1,) optimizer: <keras.optimizers.Adam object at 0x7f9ee9350a90>
---> loss type for clf: Sparse
-->> Data has been shuffled in: training


In [15]:
keras.backend.get_session().run(tf.global_variables_initializer())
model.evaluate_generator(val_ds)

[0.18832453872476304,
 0.027383473834821155,
 1.6094106197357179,
 0.027383483733449662,
 1.609486481121608]

In [118]:
# evaluate
model.evaluate_generator(val_ds)

[0.011967233182596309, 0.011967231120382036, 2.1739711776800975e-09]

### 4. Load new weights & compile it again

In [27]:
general_loss_weights={'Concat_predicted_frames':1., 'softmax_clf':.1}


# compile with a custom loss that handle both regression and classification
lr = 0.001, 
grad_clip=1.
optimizer = [optimizers.SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=True), optimizers.Adam(lr=lr, clipvalue=grad_clip)][-1]
print("lr:", lr, 'optimizer:', optimizer)

# load weights
name = ['Moscow_e02_valoss0.212_RAEwSCwWSwINwCLF_frozenUnet.hdf5'][-1]
weights = '/home/pherruzo/projects/nips_traffic/checkpoints/' + name
if weights is not None:
    print('loading weitghs:', weights)
    model.load_weights(weights, by_name=True, skip_mismatch=True) #l24)#, by_name=True)

model.compile(optimizer=optimizer,
              loss={'Concat_predicted_frames': 'mse',
                    'softmax_clf': losses.sparse_categorical_crossentropy},
              metrics={'Concat_predicted_frames': ['mse'], 
                       'softmax_clf':             [losses.sparse_categorical_crossentropy]},#'accuracy']},
              loss_weights=general_loss_weights, 
              sample_weight_mode="temporal")

lr: (0.001,) optimizer: <keras.optimizers.Adam object at 0x7f9ebaea9ac8>
loading weitghs: /home/pherruzo/projects/nips_traffic/checkpoints/Moscow_e02_valoss0.212_RAEwSCwWSwINwCLF_frozenUnet.hdf5


### 5. Define callbacks

In [114]:
from keras import backend as K 

class LRTensorBoard(keras.callbacks.TensorBoard):
    def __init__(self, log_dir, **kwargs):  # add other arguments to __init__ if you need
        super().__init__(log_dir=log_dir, **kwargs)

    def on_epoch_end(self, epoch, logs=None):
        logs.update({'lr': K.eval(self.model.optimizer.lr)})
        super().on_epoch_end(epoch, logs)
        
############################################# PREPARE CALLBACKS
run_name = 'RAEwALLwCLF'#'RAEwSCaWT_w_IN'
run_num = '1'
## tensorboard
tb_folder = "/tb_{}_{}/run_{}".format(training_params["city"],"1", run_name+'_'+run_num)
tb_folder = training_params["tensorboard_path"]+MODEL_TYPES[m_type]+tb_folder
# tb_update_freq = 1000 * params["batch_size"]
# print("Saving training progress in {} each {} samples.".format(tb_folder, tb_update_freq))

# save best model
# checkpoint
root= "/home/pherruzo" 
model_dir=root+"/projects/nips_traffic/checkpoints/"
filepath=model_dir+training_params["city"]+"_e{epoch:02d}_valoss{val_loss:.3f}_"+run_name+".hdf5"
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', save_weights_only=True, save_best_only=True)

# ep 1-4 lr: 0.001
# ep 5- lr: 0.0001

my_callbacks = [keras.callbacks.TensorBoard(log_dir=tb_folder), checkpoint, LRTensorBoard(log_dir=tb_folder)] #, update_freq=tb_update_freq)]
# my_callbacks = [keras.callbacks.TensorBoard(log_dir=tb_folder, histogram_freq=1, write_grads=True), checkpoint, LRTensorBoard(log_dir=tb_folder]
# my_callbacks = None
filepath, tb_folder

('/home/pherruzo/projects/nips_traffic/checkpoints/Moscow_e{epoch:02d}_valoss{val_loss:.3f}_tttest.hdf5',
 '/home/pherruzo/projects/nips_traffic/tensorboard/RAE_w_SC_WS_wIN/tb_Moscow_1/run_tttest_1')

# TRAIN

In [None]:
DEBUG = False

if DEBUG:
    print('validation data to memory')
    valid_data = ([x_val, y_val], y_val)
    my_callbacks = [keras.callbacks.TensorBoard(log_dir=tb_folder, histogram_freq=1, write_grads=True), 
                    checkpoint, 
                    LRTensorBoard(log_dir=tb_folder)]
else:
    valid_data = val_ds
    my_callbacks = [keras.callbacks.TensorBoard(log_dir=tb_folder), checkpoint, LRTensorBoard(log_dir=tb_folder)]

print("learning rate:", K.get_value(model.optimizer.lr))
hist = model.fit_generator(training_ds, epochs=20, verbose=1, callbacks=my_callbacks, 
                           validation_data=valid_data,
                           max_queue_size=10, workers=4, shuffle=True, initial_epoch=1)#, steps_per_epoch=2000) # starts with next epoch

learning rate: 0.01
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

# Evaluate

In [160]:
desc = ['val_loss', 'val_Concat_predicted_frames_loss', 'val_softmax_clf_loss', 'val_Concat_predicted_frames_mean_squared_error', 'val_Concat_predicted_frames_r_emb_loss', 'val_softmax_clf_sparse_categorical_crossentropy']

# with clf
metrics = model.evaluate_generator(val_ds)

print("performance in validations:")
for d, v in zip(desc, metrics):
    print('{}: {}'.format(d, v))

performance in validations:
val_loss: 1.2941249370574952
val_Concat_predicted_frames_loss: 0.07352456067289626
val_softmax_clf_loss: 1.2206004006522042
val_Concat_predicted_frames_mean_squared_error: 0.02733775063284806
val_Concat_predicted_frames_r_emb_loss: 0.04618680945464543
val_softmax_clf_sparse_categorical_crossentropy: 1.013511712210519


In [19]:
desc = ['val_loss', 'val_Concat_predicted_frames_loss', 'val_softmax_clf_loss', 'val_Concat_predicted_frames_mean_squared_error', 'val_Concat_predicted_frames_r_emb_loss', 'val_softmax_clf_sparse_categorical_crossentropy']

# with clf
metrics = model.evaluate_generator(val_ds)

print("performance in validations:")
for d, v in zip(desc, metrics):
    print('{}: {}'.format(d, v))

performance in validations:
val_loss: 0.025962504638092857
val_Concat_predicted_frames_loss: 0.025962504638092857
val_softmax_clf_loss: 1.668187713623047
val_Concat_predicted_frames_mean_squared_error: 0.015087313205003738
val_Concat_predicted_frames_r_emb_loss: 0.010875191566135202
val_softmax_clf_sparse_categorical_crossentropy: 0.8352041755403791
