In [50]:
import tensorflow as tf
import numpy as np
import scipy.stats
import scipy.io
import scipy.sparse
from scipy.io import loadmat
import pandas as pd
import tensorflow_probability as tfp
tfd = tfp.distributions
tfk = tf.keras
tfkl = tf.keras.layers
from PIL import Image
import matplotlib.pyplot as plt

In [51]:
from sklearn.datasets import load_iris
data = load_iris(True)[0]

We now standardise the data:

In [52]:
xfull = ((data - np.mean(data,0))/np.std(data,0)).astype(np.float32)
n = xfull.shape[0] # number of observations
p = xfull.shape[1] # number of feat*ures

We will remove uniformy at random 10% of the data. This corresponds to a *missing completely at random (MCAR)* scenario.

In [53]:
perc_miss = 0.1 # 50% of missing data
xmiss = np.copy(xfull)
xmiss_flat = xmiss.flatten()
miss_pattern = np.random.choice(n*p, np.floor(n*p*perc_miss).astype(np.int), replace=False)
xmiss_flat[miss_pattern] = np.nan 
xmiss = xmiss_flat.reshape([n,p]) # in xmiss, the missing values are represented by nans
mask = np.isfinite(xmiss) # binary mask that indicates which values are missing

We want to learn a Gaussian distribution.

In [115]:
mu = tf.Variable(tf.ones([p]), dtype=tf.float32)
log_sigma_diag = tf.Variable(tf.zeros([p]), dtype=tf.float32) # log-sd of the Gaussian

In [116]:
@tf.function
def log_likelihood(x):
  sigma_diag = tf.exp(log_sigma_diag)
  p_x = tfd.MultivariateNormalDiag(loc = mu, scale_diag = sigma_diag)
  return(p_x.log_prob(x))

In [117]:
@tf.function
def log_likelihood_incomplete(x,m): # log(p(x_obs))
  x = tf.squeeze(x)
  m = tf.squeeze(m)
  mean = mu[m]
  sigma_diag = tf.exp(log_sigma_diag[m])
  p_x_obs = tfd.MultivariateNormalDiag(loc = mean, scale_diag = sigma_diag)
  return(p_x_obs.log_prob(x[m]))

In [121]:
params = [mu] + [log_sigma_diag]

optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

In [122]:
def train_step(data):
  with tf.GradientTape() as tape: # the gradient tape saves all the step that needs to be saved fopr automatic differentiation
    loss = -log_likelihood(data)  # the loss is the average negative log likelihood
  gradients = tape.gradient(loss, params)  # here, the gradient is automatically computed
  optimizer.apply_gradients(zip(gradients, params))  # Adam iteration

In [123]:
def train_step_incomplete(data,mask):
  with tf.GradientTape() as tape: # the gradient tape saves all the step that needs to be saved fopr automatic differentiation
    loss = -log_likelihood_incomplete(data,mask)  # the loss is the average negative log likelihood
  gradients = tape.gradient(loss, params)  # here, the gradient is automatically computed
  optimizer.apply_gradients(zip(gradients, params))  # Adam iteration

In [124]:
train_step_incomplete(x,m)

In [112]:
train_data_complete = tf.data.Dataset.from_tensor_slices(xfull).shuffle(p).batch(1) 

In [66]:
EPOCHS = 1000

for epoch in range(1,EPOCHS+1):
  for data in train_data_complete:
    train_step(data) # Adam iteration
  if (epoch % 100) == 1:
    ll_train = tf.reduce_mean(log_likelihood(xfull))
    print('Epoch  %g' %epoch)
    print('Training log-likelihood %g' %ll_train.numpy())
    print('Mean %g')
    tf.print(mu)
    print('-----------')

Epoch  1
Training log-likelihood -7.62365
Mean %g
[0.991437614 0.989106357 0.992231309 0.992026329]
-----------
Epoch  101
Training log-likelihood -5.84348
Mean %g
[0.287679195 0.282118648 0.291233122 0.291394711]
-----------
Epoch  201
Training log-likelihood -5.68568
Mean %g
[0.0684714913 0.0631889403 0.0721385106 0.0728080049]
-----------
Epoch  301
Training log-likelihood -5.67655
Mean %g
[0.0188003127 0.0136350179 0.0224719066 0.023287328]
-----------
Epoch  401
Training log-likelihood -5.67593
Mean %g
[0.0076692 0.00255682436 0.0113275219 0.0121578667]
-----------
Epoch  501
Training log-likelihood -5.67585
Mean %g
[0.00514175976 1.01809428e-05 0.00880924892 0.00961178355]
-----------
Epoch  601
Training log-likelihood -5.67584
Mean %g
[0.00461950805 -0.000463179429 0.00827990938 0.00910876784]
-----------
Epoch  701
Training log-likelihood -5.67584
Mean %g
[0.00452799769 -0.000578902953 0.00813991856 0.00897171814]
-----------
Epoch  801
Training log-likelihood -5.67584
Mean %g


In [119]:
train_data_incomplete = tf.data.Dataset.from_tensor_slices((xmiss,mask)).shuffle(p).batch(1) 

In [125]:
EPOCHS = 1000

for epoch in range(1,EPOCHS+1):
  for data,m in train_data_incomplete:
    train_step_incomplete(data,m) # Adam iteration
  if (epoch % 100) == 1:
    ll_train = tf.reduce_mean(log_likelihood(xfull))
    print('Epoch  %g' %epoch)
    print('Training log-likelihood %g' %ll_train.numpy())
    print('Mean %g')
    tf.print(mu)
    print('-----------')

Epoch  1
Training log-likelihood -7.62602
Mean %g
[0.991752088 0.989478 0.992643416 0.992380202]
-----------
Epoch  101
Training log-likelihood -5.87581
Mean %g
[0.306274176 0.302872032 0.328692913 0.32934466]
-----------
Epoch  201
Training log-likelihood -5.69444
Mean %g
[0.0719177946 0.0764498785 0.102164574 0.112460889]
-----------
Epoch  301
Training log-likelihood -5.67983
Mean %g
[0.0133349244 0.0214061942 0.0454553962 0.0600944161]
-----------
Epoch  401
Training log-likelihood -5.67824
Mean %g
[-0.00100588752 0.00822484586 0.0314968675 0.0476355068]
-----------


KeyboardInterrupt: ignored