# Loading Model Requirements

In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals
%matplotlib notebook

import os
import time
import numpy as np
import glob
import matplotlib.pyplot as plt
import PIL
import imageio
import random
import math


import tensorflow as tf
import tensorflow_probability as tfp

from IPython import display
from sklearn import preprocessing
from pickle import dump, load

from matplotlib.ticker import FormatStrFormatter
from IPython.display import SVG

tf.random.set_seed(1234)

tfd = tfp.distributions
tfb = tfp.bijectors
tfk = tf.keras

# Loading Data and Preprocessing

In [0]:
import math
def lonlat2meters(lon, lat):
    semimajoraxis = 6378137.0
    east = lon * 0.017453292519943295
    north = lat * 0.017453292519943295
    t = math.sin(north)
    return semimajoraxis * east, 3189068.5 * math.log((1 + t) / (1 - t))

def meters2lonlat(x, y):
    semimajoraxis = 6378137.0
    lon = x / semimajoraxis / 0.017453292519943295
    t = math.exp(y / 3189068.5)
    lat = math.asin((t - 1) / (t + 1)) / 0.017453292519943295
    return lon, lat

In [0]:
dataset = np.genfromtxt('../processed_nyc_train.csv',delimiter=',', skip_header=1)
dataset = dataset[~np.isnan(dataset).any(axis=1)]

def format_data(dataset, pick_up_scaler=None, drop_off_scaler = None ,save_scaler=True):
    
    pick_up_c, drop_off_c, num_passenger, travel_duration = np.split(dataset, [2, 4, 5], axis = 1)
    
    # Handling of the coordinates
    for i, c in enumerate(pick_up_c):
        lon = pick_up_c[i][0]
        lat = pick_up_c[i][1]
        x, y = lonlat2meters(lon, lat)
        pick_up_c[i][0] = x
        pick_up_c[i][1] = y
    
    if pick_up_scaler is None:
        pick_up_scaler = preprocessing.StandardScaler()
        pick_up_scaler = pick_up_scaler.fit(pick_up_c)
    
    pick_up_c = pick_up_scaler.transform(pick_up_c)
    
    for i, c in enumerate(drop_off_c):
        lon = drop_off_c[i][0]
        lat = drop_off_c[i][1]
        x, y = lonlat2meters(lon, lat)
        drop_off_c[i][0] = x
        drop_off_c[i][1] = y
    
    if drop_off_scaler is None:
        drop_off_scaler = preprocessing.StandardScaler()
        drop_off_scaler = drop_off_scaler.fit(drop_off_c)
    
    drop_off_c = drop_off_scaler.transform(drop_off_c)
    
    
    if save_scaler:
        dump(pick_up_scaler, open('pick_up_scaler.pkl', 'wb'))
        dump(drop_off_scaler, open('drop_off_scaler.pkl', 'wb'))

    final = np.concatenate([pick_up_c, drop_off_c, num_passenger, travel_duration], axis = 1)
    return final

dataset = format_data(dataset)

# Model Definition

In [0]:
import time

import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow.keras.layers import Layer, Dense, BatchNormalization, ReLU, Conv2D, Reshape
from tensorflow.keras import Model

tfd = tfp.distributions
tfb = tfp.bijectors
tfk = tf.keras

tf.keras.backend.set_floatx('float32')

print('tensorflow: ', tf.__version__)
print('tensorflow-probability: ', tfp.__version__)

from enum import Enum

class Case(Enum):
    sampling = 1
    density_estimation = 2

class Made(tfk.layers.Layer):
    """
    Implementation of a Masked Autoencoder for Distribution Estimation (MADE) [Germain et al. (2015)].
    The existing TensorFlow bijector "AutoregressiveNetwork" is used. The output is reshaped to output one shift vector
    and one log_scale vector.

    :param params: Python integer specifying the number of parameters to output per input.
    :param event_shape: Python list-like of positive integers (or a single int), specifying the shape of the input to this layer, which is also the event_shape of the distribution parameterized by this layer. Currently only rank-1 shapes are supported. That is, event_shape must be a single integer. If not specified, the event shape is inferred when this layer is first called or built.
    :param hidden_units: Python list-like of non-negative integers, specifying the number of units in each hidden layer.
    :param activation: An activation function. See tf.keras.layers.Dense. Default: None.
    :param use_bias: Whether or not the dense layers constructed in this layer should have a bias term. See tf.keras.layers.Dense. Default: True.
    :param kernel_regularizer: Regularizer function applied to the Dense kernel weight matrices. Default: None.
    :param bias_regularizer: Regularizer function applied to the Dense bias weight vectors. Default: None.
    """

    def __init__(self, params, event_shape=None, hidden_units=None, activation=None, use_bias=True,
                 kernel_regularizer=None, bias_regularizer=None):

        super(Made, self).__init__()

        self.params = params
        self.event_shape = event_shape
        self.hidden_units = hidden_units
        self.activation = activation
        self.use_bias = use_bias
        self.kernel_regularizer = kernel_regularizer
        self.bias_regularizer = bias_regularizer

        self.network = tfb.AutoregressiveNetwork(params=params, event_shape=event_shape, hidden_units=hidden_units,
                                                 activation=activation, use_bias=use_bias, kernel_regularizer=kernel_regularizer, 
                                                 bias_regularizer=bias_regularizer)

    def call(self, x):
        shift, log_scale = tf.unstack(self.network(x), num=2, axis=-1)

        return shift, tf.math.tanh(log_scale)

class BatchNorm(tfb.Bijector):
    """
    Implementation of a Batch Normalization layer for use in normalizing flows according to [Papamakarios et al. (2017)].
    The moving average of the layer statistics is adapted from [Dinh et al. (2016)].
    :param eps: Hyperparameter that ensures numerical stability, if any of the elements of v is near zero.
    :param decay: Weight for the update of the moving average, e.g. avg = (1-decay)*avg + decay*new_value.
    """

    def __init__(self, eps=1e-5, decay=0.95, validate_args=False, name="batch_norm"):
        super(BatchNorm, self).__init__(
            forward_min_event_ndims=1,
            inverse_min_event_ndims=1,
            validate_args=validate_args,
            name=name)

        self._vars_created = False
        self.eps = eps
        self.decay = decay

    def _create_vars(self, x):
        # account for 1xd and dx1 vectors
        if len(x.get_shape()) == 1:
            n = x.get_shape().as_list()[0]
        if len(x.get_shape()) == 2: 
            n = x.get_shape().as_list()[1]

        self.beta = tf.compat.v1.get_variable('beta', [1, n], dtype=tf.float32)
        self.gamma = tf.compat.v1.get_variable('gamma', [1, n], dtype=tf.float32)
        self.train_m = tf.compat.v1.get_variable(
            'mean', [1, n], dtype=tf.float32, trainable=False)
        self.train_v = tf.compat.v1.get_variable(
            'var', [1, n], dtype=tf.float32, trainable=False)

        self._vars_created = True

    def _forward(self, u):
        if not self._vars_created:
            self._create_vars(u)
        return (u - self.beta) * tf.exp(-self.gamma) * tf.sqrt(self.train_v + self.eps) + self.train_m

    def _inverse(self, x):
        # Eq. 22 of [Papamakarios et al. (2017)]. Called during training of a normalizing flow.
        if not self._vars_created:
            self._create_vars(x)

        # statistics of current minibatch
        m, v = tf.nn.moments(x, axes=[0], keepdims=True)
        
        # update train statistics via exponential moving average
        self.train_v.assign_sub(self.decay * (self.train_v - v))
        self.train_m.assign_sub(self.decay * (self.train_m - m))

        # normalize using current minibatch statistics, followed by BN scale and shift
        return (x - m) * 1. / tf.sqrt(v + self.eps) * tf.exp(self.gamma) + self.beta

    def _inverse_log_det_jacobian(self, x):
        # at training time, the log_det_jacobian is computed from statistics of the
        # current minibatch.
        if not self._vars_created:
            self._create_vars(x)
            
        _, v = tf.nn.moments(x, axes=[0], keepdims=True)
        abs_log_det_J_inv = tf.reduce_sum(
            self.gamma - .5 * tf.math.log(v + self.eps))
        return abs_log_det_J_inv


tensorflow:  2.2.0-rc2
tensorflow-probability:  0.9.0


In [0]:
hidden_shape = [100, 100]  # hidden shape for MADE network of MAF
layers = 12  # number of layers of the flow
event_shape=[6]

base_dist = tfd.Normal(loc=0.0, scale=1.0)  # specify base distribution

# According to [Papamakarios et al. (2017)]:
# BatchNorm between the last autoregressive layer and the base distribution, and every two autoregressive layers
bijectors = []
bijectors.append(BatchNorm(eps=10e-5, decay=0.95))
for i in range(0, layers):
    bijectors.append(tfb.MaskedAutoregressiveFlow(shift_and_log_scale_fn = Made(params=2, event_shape=event_shape, hidden_units=hidden_shape, activation="relu")))
    bijectors.append(tfb.Permute(permutation=[5,4,3,2,1,0]))  # data permutation after layers of MAF

    # add BatchNorm every two layers
    if (i+1) % int(2) == 0:
        bijectors.append(BatchNorm(eps=10e-5, decay=0.95))
    
bijector = tfb.Chain(bijectors=list(reversed(bijectors)), name='chain_of_maf')

maf = tfd.TransformedDistribution(
    distribution=base_dist,
    bijector=bijector,
    event_shape=event_shape
)

# initialize flow
# samples = maf.sample()

# Model Training Parameters

In [0]:
base_lr = 1e-3
end_lr = 1e-4
max_epochs = int(5e3)  # maximum number of epochs of the training
learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(base_lr, max_epochs, end_lr, power=0.5)

In [0]:
opt = tf.keras.optimizers.Adam(learning_rate=learning_rate_fn) 

x_ = tf.keras.layers.Input(shape=event_shape, dtype=tf.float32)
log_prob_ = maf.log_prob(x_)
model = tf.keras.Model(x_, log_prob_)

Instructions for updating:
`AffineScalar` bijector is deprecated; please use `tfb.Shift(loc)(tfb.Scale(...))` instead.


In [0]:
model.compile(optimizer=opt,
              loss=lambda _, log_prob: -log_prob)

# Setting Up Checkpoints (Will Save Model After Every Epoch)

In [0]:
batch_size = 100
weight_file = './checkpoint/cp.h5'

In [1]:
for i in range(50): # change to desired number of epochs
  model.load_weights(weight_file)
  model.fit(x=dataset,
            y=np.zeros((dataset.shape[0], 0), dtype=np.float32),
            batch_size=batch_size,
            epochs=1,
            steps_per_epoch=dataset.shape[0] // batch_size,
            shuffle=True,
            verbose=True)
  model.save_weights(weight_file)

NameError: name 'model' is not defined

In [0]:
model.save_weights("MAF_12_100_100_batchnorm.h5")

# Generating Samples

In [0]:
output = maf.sample(100000).numpy()

In [0]:
def reconstruct(predictions):
    
    # split the output first
    pick_up_c, drop_off_c, num_passenger, travel_duration = np.split(dataset, [2, 4, 5], axis = 1)
    
    # recover scaler
    pick_up_scaler = load(open('pick_up_scaler.pkl', 'rb'))
    drop_off_scaler = load(open('drop_off_scaler.pkl', 'rb'))
    pick_up_c = pick_up_scaler.inverse_transform(pick_up_c)
    drop_off_c = drop_off_scaler.inverse_transform(drop_off_c)

    for i, c in enumerate(pick_up_c):
      x = pick_up_c[i][0]
      y = pick_up_c[i][1]
      lon, lat = meters2lonlat(x, y)
      pick_up_c[i][0] = lon
      pick_up_c[i][1] = lat
    
    for i, c in enumerate(pick_up_c):
      x = drop_off_c[i][0]
      y = drop_off_c[i][1]
      lon, lat = meters2lonlat(x, y)
      drop_off_c[i][0] = lon
      drop_off_c[i][1] = lat
    
    return np.concatenate([pick_up_c, drop_off_c, num_passenger, travel_duration], axis = 1)

In [0]:
samples = reconstruct(output)

# Saving Generated Samples

In [0]:
file_name = '100000_samples_MAF_12_100_100_batchnorm' + '.csv'
np.savetxt(file_name, samples, delimiter = ',', header='pickup_longitude, pickup_latitude, dropoff_longitude, dropoff_latitude, passenger_count, trip_duration' )