In [21]:
import numpy as np
from myfunctions import action_to_group, get_group_labels, read_data, read_config, num_to_idx 
directory_dataset = '/home/nathan/Documents/FYP_code/LSTM1_guillermo/data_nathan/'

feat_size = 63 #21 joints * 3 dimensions (xyz)
batch_size = 20
padding_size = 300

# Parameters
learning_rate = 0.003
n_epochs = 200

# Network Parameters
seq_max_len = padding_size # Sequence max length
n_hidden = 100 # hidden layer num of features
p_dropout = 0.5

#Called to load training/testing data 
def create_dataset(filename, group_number):
    # dataset is organized as NxLxD (N = num samples, L temporal length with padding, D feature dimension
    # labels is NxY where Y is one hot label vector
    
    # -3 sends back labels as actions number (0..45) with one hot enconding (000..1..00)
    # -2 sends back labels as actions number (0..45) with no one hot enconding (1 or 2 or 45)
    # -1 sends back labels as groups number (0..12or26) with one hot encoding (000..1..00)
    # 0> sends back labels of only this group with one hot enconding

    dataset, labels, lengths = [], [], []
    files = read_config(filename)
    
    if group_number == -3: #standard 45 actions RNN
        num_classes = 45
    elif group_number == -2: #groupRNN to subnets (testing phase)
        #num_classes = 45 #not needed in the function when -2 activated
        pass
    elif group_number == -1: #groupRNN
        num_classes = max(atog) + 1 
        #pass
    else: #subnets
        list_actions = gtoa[group_number] #gives list of actions in current group number
        num_classes = len(list_actions)
        
    for i in files:
        
        if group_number == -3:  #standard 45 actions RNN
            
            tmp_data = read_data(directory_dataset + i[0]) #i[0] =  a01s01r01.txt (for eg)
            num =  int( i[1] )
            tmp_labels = np.transpose(num_to_idx(num, num_classes)) #one hot encoding
        
        elif group_number == -2: #Feed output of groupRNN to pre-trained subnet (testing phase)
            
            tmp_data = read_data(directory_dataset + i[0]) #i[0] =  a01s01r01.txt (for eg)
            tmp_labels = int(i[1]) #will one hot later as first need to dispatch data to relevant subnet using labels
        
        elif group_number == -1: #Build groupRNN
            
            tmp_data = read_data(directory_dataset + i[0]) 
            num =  atog[ int( i[1] )] #only used for one hot encoding in the line below
            tmp_labels = np.transpose(num_to_idx(num, num_classes)) 
            #i[0] =  a01s01r01.txt (for eg)
            #i[1] = number between 0 and 44 (or as many classes there is)
            #tmp_labels = [0, 0, 1, 0, .. 0] = one-hot encoding of class value
            #tmp_data = list with variable len around 300 & each item in the list is a nested list of len 63 (=feat_size)
        
        else: #Build subnet RNN
            
            num = int( i[1] ) #original action number
            if num in list_actions: #select data only if belongs to group 
                tmp_data = read_data(directory_dataset + i[0]) #add to dataset if part of the group
                tmp_labels = np.transpose(num_to_idx(list_actions.index(num), num_classes)) #add labels as well     
            else: continue
            
        if len(tmp_data)<300: #why 300, is that the longest sequence ? 300 = padding_size btw...
            
            #records tmp_data initial length before padding
            #pads tmp_data with zeros until padding_size (300) so len(tmp_data) = 300 always with len 63 items
                
            lengths.append(len(tmp_data))
            tmp_data.extend([ [0.0] * feat_size ] * (padding_size - len(tmp_data)))  

            dataset.append(tmp_data)
            labels.append(tmp_labels) 
        
    # all 0..565 lists with item as nested lists of size (300, 26, 1)
    
    return np.asarray(dataset), np.asarray(labels), np.asarray(lengths,dtype=np.int32)

#Called once when training RNN
def batch_generation(data,labels,lengths):
    
    num_classes = max(labels)+1 #labels is a one hot encoded numpy array. This returns number of columns (=groups).
    
    nsamples,_,_ = data.shape

    indices = np.arange(nsamples) #np.arange(3) -> array([0, 1, 2])
    np.random.shuffle(indices) #shuffle the indices
    num_batches = int(np.floor(nsamples/batch_size)) #round to inferior number so = 0 if batch_size bigger than nsamples
    not_exact = 0

    if nsamples%batch_size != 0: #happens all the time unless nsamples is lucky multiple of batch_size
        not_exact = 1
    
    #declare empty arrays to contain the batches, dimensions are right
    batches_data = np.empty(shape=[num_batches+not_exact,batch_size,padding_size,feat_size])
    batches_labels = np.empty(shape=[num_batches+not_exact,batch_size,num_classes]) 
    batches_lengths = np.empty(shape=[num_batches + not_exact, batch_size],dtype=np.int32)

    for x in range(num_batches):
        batches_data[x, :, :, :] = data[indices[batch_size*x:batch_size*(x+1)], :, :]
        batches_labels[x,:,:] = labels[indices[batch_size*x:batch_size*(x+1)], :]
        batches_lengths[x,:] = lengths[indices[batch_size*x:batch_size*(x+1)]]

    if not_exact > 0:
        
        to_complete = nsamples%batch_size
        
        #nsamples is too small, reuse the samples from previous batch, taken randomly to complete this batch
        tmp_random = np.random.randint(0,nsamples,batch_size-to_complete) # we complete last batch with random samples
        #prints list of indices it will take randomly
        
        #[num_batches] refers to the last batch that is not complete
        tmp_data = data[indices[batch_size*num_batches:batch_size*num_batches+to_complete],:,:]
        batches_data[num_batches]=np.concatenate((tmp_data,data[tmp_random,:,:]),axis=0)

        tmp_labels = labels[indices[batch_size*num_batches:batch_size*num_batches+to_complete],:]
        batches_labels[num_batches] = np.concatenate((tmp_labels,labels[indices[tmp_random],:]))
        
        tmp_lengths = lengths[indices[batch_size*num_batches:batch_size*num_batches+to_complete]]
        batches_lengths[num_batches] = np.concatenate((tmp_lengths,lengths[indices[tmp_random]]))

    return batches_data, batches_labels, batches_lengths, num_batches+not_exact

In [6]:
# Full example for my blog post at:
# https://danijar.com/building-variational-auto-encoders-in-tensorflow/

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

tfd = tf.contrib.distributions
tf.reset_default_graph()

def make_encoder(data, code_size):
  x = tf.layers.flatten(data)
  x = tf.layers.dense(x, 200, tf.nn.relu)
  x = tf.layers.dense(x, 200, tf.nn.relu)
  loc = tf.layers.dense(x, code_size)
  scale = tf.layers.dense(x, code_size, tf.nn.softplus)
  return tfd.MultivariateNormalDiag(loc, scale)


def make_prior(code_size):
  loc = tf.zeros(code_size)
  scale = tf.ones(code_size)
  return tfd.MultivariateNormalDiag(loc, scale)


def make_decoder(code, data_shape):
  x = code
  x = tf.layers.dense(x, 200, tf.nn.relu)
  x = tf.layers.dense(x, 200, tf.nn.relu)
  logit = tf.layers.dense(x, np.prod(data_shape))
  logit = tf.reshape(logit, [-1] + data_shape)

  #return tfd.Independent(tfd.Bernoulli(logit), 2)
  return tfd.Independent( tfd.Normal(loc=0., scale=1.), 63)


def plot_codes(ax, codes, labels):
  ax.scatter(codes[:, 0], codes[:, 1], s=2, c=labels, alpha=0.1)
  ax.set_aspect('equal')
  ax.set_xlim(codes.min() - .1, codes.max() + .1)
  ax.set_ylim(codes.min() - .1, codes.max() + .1)
  ax.tick_params(
      axis='both', which='both', left=False, bottom=False,
      labelleft=False , labelbottom=False)


def plot_samples(ax, samples):
  for index, sample in enumerate(samples):
    ax[index].imshow(sample, cmap='gray')
    ax[index].axis(False)

#data = tf.placeholder(tf.float32, [None, 28, 28])
data = tf.placeholder(tf.float32, [None, 300, 63])

make_encoder = tf.make_template('encoder', make_encoder)
make_decoder = tf.make_template('decoder', make_decoder)

prior = make_prior(code_size=2)
posterior = make_encoder(data, code_size=2)
code = posterior.sample()

#likelihood = make_decoder(code, [28, 28]).log_prob(data)
likelihood = make_decoder(code, [300, 63]).log_prob(data)

divergence = tfd.kl_divergence(posterior, prior)
elbo = tf.reduce_mean(likelihood - divergence)
optimize = tf.train.AdamOptimizer(0.001).minimize(-elbo)

#samples = make_decoder(prior.sample(10), [28, 28]).mean()
samples = make_decoder(prior.sample(10), [300, 63]).mean()

#mnist = input_data.read_data_sets('MNIST_data/')
train_data, train_labels, train_lengths = create_dataset("training.txt",-2)
(batch_x, batch_y, batch_seqlen, n_batches) = batch_generation(train_data, train_labels, train_lengths)

##fig, ax = plt.subplots(nrows=20, ncols=11, figsize=(10, 20))

with tf.train.MonitoredSession() as sess:
  for epoch in range(20):
    
    #feed = {data: mnist.test.images.reshape([-1, 28, 28])}
    feed = {data: train_data}
    
    test_elbo, test_codes, test_samples = sess.run([elbo, code, samples], feed)
    
    print('Epoch', epoch, 'tes_elbo', test_elbo, 'test_codes[0]', test_codes[0])
    
    ##ax[epoch, 0].set_ylabel('Epoch {}'.format(epoch))
    ##plot_codes(ax[epoch, 0], test_codes, mnist.test.labels) #plot codes where ?
    ##plot_codes(ax[epoch, 0], test_codes, train_labels)
    ##plot_samples(ax[epoch, 1:], test_samples) #not working
    
    
    for i in range(30):
    #for _ in range(600):
      #feed = {data: mnist.train.next_batch(100)[0].reshape([-1, 28, 28])}
      feed = {data: batch_x[i] } #batch_x is (30, 20, 300, 63)
    
      sess.run(optimize, feed)
##plt.savefig('vae-mnist.png', dpi=300, transparent=True, bbox_inches='tight')
##plt.savefig('vae-actist.png', dpi=300, transparent=True, bbox_inches='tight') #algo not working so not point saving atm

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
('Epoch', 0, 'tes_elbo', -13100.437, 'test_codes[0]', array([-0.71884704,  1.0326077 ], dtype=float32))


NameError: name 'batch_generation' is not defined

In [None]:
#test_codes.shape (10000, 2) #why returns 10000 ?
#test_samples.shape (10, 28, 28)
#test_elbo.shape ()
#mnist.test.images.reshape([-1, 28, 28]) (10000, 28, 28)
#mnist.test.labels (10000,)
#mnist.train.next_batch(100)[0].shape (100, 784) 
## 10000 images each of dimensions 28*28, 10 samples of those images per epoch, 2 dimensions for the embedding code.
## MNIST:The training set contains 60000 examples, and the test set 10000 examples.
# Pass input data as numpy array of dimension (number of samples, dimA, dimB) -> (595, 300, 63)
# No labels to do VAE (unsupervised learning)
# Ok got it for input.
# For distrib ?
# For visualisation ?


** ORIGINAL VERSION BELOW **

In [1]:
# Full example for my blog post at:
# https://danijar.com/building-variational-auto-encoders-in-tensorflow/

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

tfd = tf.contrib.distributions


def make_encoder(data, code_size):
  x = tf.layers.flatten(data)
  x = tf.layers.dense(x, 200, tf.nn.relu)
  x = tf.layers.dense(x, 200, tf.nn.relu)
  loc = tf.layers.dense(x, code_size)
  scale = tf.layers.dense(x, code_size, tf.nn.softplus)
  return tfd.MultivariateNormalDiag(loc, scale)


def make_prior(code_size):
  loc = tf.zeros(code_size)
  scale = tf.ones(code_size)
  return tfd.MultivariateNormalDiag(loc, scale)


def make_decoder(code, data_shape):
  x = code
  x = tf.layers.dense(x, 200, tf.nn.relu)
  x = tf.layers.dense(x, 200, tf.nn.relu)
  logit = tf.layers.dense(x, np.prod(data_shape))
  logit = tf.reshape(logit, [-1] + data_shape)
  return tfd.Independent(tfd.Bernoulli(logit), 2)


def plot_codes(ax, codes, labels):
  ax.scatter(codes[:, 0], codes[:, 1], s=2, c=labels, alpha=0.1)
  ax.set_aspect('equal')
  ax.set_xlim(codes.min() - .1, codes.max() + .1)
  ax.set_ylim(codes.min() - .1, codes.max() + .1)
  ax.tick_params(
      axis='both', which='both', left='off', bottom='off',
      labelleft='off', labelbottom='off')


def plot_samples(ax, samples):
  for index, sample in enumerate(samples):
    ax[index].imshow(sample, cmap='gray')
    ax[index].axis('off')


data = tf.placeholder(tf.float32, [None, 28, 28])

make_encoder = tf.make_template('encoder', make_encoder)
make_decoder = tf.make_template('decoder', make_decoder)

# Define the model.
prior = make_prior(code_size=2)
posterior = make_encoder(data, code_size=2)
code = posterior.sample()

# Define the loss.
likelihood = make_decoder(code, [28, 28]).log_prob(data)
divergence = tfd.kl_divergence(posterior, prior)
elbo = tf.reduce_mean(likelihood - divergence)
optimize = tf.train.AdamOptimizer(0.001).minimize(-elbo)

samples = make_decoder(prior.sample(10), [28, 28]).mean()

mnist = input_data.read_data_sets('MNIST_data/')
fig, ax = plt.subplots(nrows=20, ncols=11, figsize=(10, 20))
with tf.train.MonitoredSession() as sess:
  for epoch in range(20):
    feed = {data: mnist.test.images.reshape([-1, 28, 28])}
    test_elbo, test_codes, test_samples = sess.run([elbo, code, samples], feed)
    print('Epoch', epoch, 'elbo', test_elbo)
    ax[epoch, 0].set_ylabel('Epoch {}'.format(epoch))
    plot_codes(ax[epoch, 0], test_codes, mnist.test.labels)
    plot_samples(ax[epoch, 1:], test_samples)
    for _ in range(600):
      feed = {data: mnist.train.next_batch(100)[0].reshape([-1, 28, 28])}
      sess.run(optimize, feed)
plt.savefig('vae-mnist.png', dpi=300, transparent=True, bbox_inches='tight')

In [2]:
mnist.train.next_batch(100)[0].shape (100, 784)

(100, 784)

In [3]:
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [4]:
print(mnist)

Datasets(train=<tensorflow.contrib.learn.python.learn.datasets.mnist.DataSet object at 0x7f78f5b70c50>, validation=<tensorflow.contrib.learn.python.learn.datasets.mnist.DataSet object at 0x7f78f5b70a90>, test=<tensorflow.contrib.learn.python.learn.datasets.mnist.DataSet object at 0x7f78f5b70690>)


In [19]:
train_data, train_labels, train_lengths = create_dataset("training.txt",-2)

In [22]:
(batch_x, batch_y, batch_seqlen, n_batches) = batch_generation(train_data, train_labels, train_lengths)
print(n_batches)

IndexError: too many indices for array

In [15]:
mnist = input_data.read_data_sets('MNIST_data/')

mnist.train.next_batch(100)[0].reshape([-1, 28, 28]).shape #(10000, 28, 28)
mnist.test.images.reshape([-1, 28, 28])[0].shape

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


(784,)

In [20]:
train_labels.shape

(595,)