We'll implement a Real NVP on Iris.

In [None]:
import tensorflow as tf
import numpy as np
import scipy.stats
import scipy.io
import seaborn as sns
import os
import scipy.sparse
from scipy.io import loadmat
import pandas as pd
import tensorflow_probability as tfp
tfd = tfp.distributions
tfb = tfp.bijectors
tfk = tf.keras
tfkl = tf.keras.layers
from PIL import Image
import matplotlib.pyplot as plt

In [None]:
!git clone https://github.com/LukasRinder/normalizing-flows.git

fatal: destination path 'normalizing-flows' already exists and is not an empty directory.


In [None]:
%cd normalizing-flows/

/content/normalizing-flows


In [None]:
import normalizingflows

In [None]:
from normalizingflows.flow_catalog import RealNVP

tensorflow:  2.4.0
tensorflow-probability:  0.12.1


In [None]:
from sklearn.datasets import load_iris
data = load_iris(True)[0]

x = ((data - np.mean(data,0))/np.std(data,0)).astype(np.float32)
n = x.shape[0] # number of observations
p = x.shape[1] # number of feat*ures

In [None]:
layers = 4

In [None]:
permutation = tf.cast(np.concatenate((np.arange(p/2,p),np.arange(0,p/2))), tf.int32)
base_dist = tfd.MultivariateNormalDiag(loc=tf.zeros(p, tf.float32))

bijectors = []

for i in range(layers):
    bijectors.append(tfb.BatchNormalization())
    bijectors.append(RealNVP(input_shape=p, n_hidden=[32,32]))
    bijectors.append(tfp.bijectors.Permute(permutation))

bijector = tfb.Chain(bijectors=list(reversed(bijectors)), name='chain_of_real_nvp')

flow = tfd.TransformedDistribution(
    distribution=base_dist,
    bijector=bijector
)


In [None]:
params =  flow.trainable_variables

optimizer = tf.keras.optimizers.Adam(learning_rate=0.00001)

In [None]:
@tf.function
def train_step(data):
  with tf.GradientTape() as tape: # the gradient tape saves all the step that needs to be saved fopr automatic differentiation
    loss = -tf.reduce_mean(flow.log_prob(data))  # the loss is the average negative log likelihood
  gradients = tape.gradient(loss, params)  # here, the gradient is automatically computed
  optimizer.apply_gradients(zip(gradients, params))  # Adam iteration

In [None]:
train_data = tf.data.Dataset.from_tensor_slices(x).shuffle(n).batch(32) # Batches of size 32

In [None]:
EPOCHS = 20001

for epoch in range(1,EPOCHS+1):
  for data in train_data:
    train_step(data) # Adam iteration
  if (epoch % 1000) == 1:
    ll_train = tf.reduce_mean(flow.log_prob(data))
    print('Epoch  %g' %epoch)
    print('Training log-likelihood %g' %ll_train.numpy())
    print('-----------')



Epoch  1
Training log-likelihood -6.2762
-----------
Epoch  1001
Training log-likelihood -2.96461
-----------
Epoch  2001
Training log-likelihood -2.24732
-----------
Epoch  3001
Training log-likelihood -2.2694
-----------
Epoch  4001
Training log-likelihood -2.41465
-----------
Epoch  5001
Training log-likelihood -1.64345
-----------
Epoch  6001
Training log-likelihood -2.07701
-----------
Epoch  7001
Training log-likelihood -2.51249
-----------
Epoch  8001
Training log-likelihood -2.58125
-----------
Epoch  9001
Training log-likelihood -1.72651
-----------
Epoch  10001
Training log-likelihood -1.42071
-----------
Epoch  11001
Training log-likelihood -1.47177
-----------
Epoch  12001
Training log-likelihood -1.69175
-----------
Epoch  13001
Training log-likelihood -2.11553
-----------
Epoch  14001
Training log-likelihood -2.20688
-----------
Epoch  15001
Training log-likelihood -1.653
-----------
Epoch  16001
Training log-likelihood -2.02274
-----------
Epoch  17001
Training log-likel