In [2]:
import tensorflow as tf
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import to_categorical
import tensorflow.contrib.eager as tfe
from functools import partial
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import matplotlib.tri as tri
from scipy import stats
from scipy.special import gamma as gamma_fun
import scipy.special as spec
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm
import numpy as np
np.set_printoptions(suppress=True)

In [9]:
tf.enable_eager_execution()

In [182]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.regularizers import l2


class Permutation(tf.keras.Model):
    """Implements a permutation layer to permute the input dimensions of the cINN block."""

    def __init__(self, input_dim):
        """
        Creates a permutation layer for a conditional invertible block.
        ----------

        Arguments:
        input_dim  : int -- the dimensionality of the input to the c inv block.
        """

        super(Permutation, self).__init__()

        permutation_vec = np.random.permutation(input_dim)
        inv_permutation_vec = np.argsort(permutation_vec)
        self.permutation = tf.Variable(initial_value=permutation_vec,
                                       trainable=False,
                                       dtype=tf.int32,
                                       name='permutation')
        self.inv_permutation = tf.Variable(initial_value=inv_permutation_vec,
                                           trainable=False,
                                           dtype=tf.int32,
                                           name='inv_permutation')

    def call(self, x, inverse=False):
        """Permutes the bach of an input."""

        if not inverse:
            return tf.transpose(tf.gather(tf.transpose(x), self.permutation))
        return tf.transpose(tf.gather(tf.transpose(x), self.inv_permutation))


class CouplingNet(tf.keras.Model):
    """Implements a conditional version of a sequential network."""

    def __init__(self, meta, n_out):
        """
        Creates a conditional coupling net (FC neural network).
        ----------

        Arguments:
        meta  : list -- a list of dictionaries, wherein each dictionary holds parameter - value pairs for a single
                       tf.keras.Dense layer.
        n_out : int  -- number of outputs of the coupling net
        """

        super(CouplingNet, self).__init__()

        self.dense = tf.keras.Sequential(
            # Hidden layer structure
            [tf.keras.layers.Dense(units,
                                   activation=meta['activation'],
                                   kernel_initializer=meta['initializer'])
             for units in meta['n_units']] +
            # Output layer
            [tf.keras.layers.Dense(n_out,
                                   kernel_initializer=meta['initializer'])]
        )

    def call(self, theta, x):
        """
        Concatenates x and y and performs a forward pass through the coupling net.
        Arguments:
        theta : tf.Tensor of shape (batch_size, inp_dim)     -- the parameters x ~ p(x|y) of interest
        x     : tf.Tensor of shape (batch_size, summary_dim) -- the summarized conditional data of interest y = sum(y)
        """

        inp = tf.concat((theta, x), axis=-1)
        out = self.dense(inp)
        return out


class ConditionalInvertibleBlock(tf.keras.Model):
    """Implements a conditional version of the INN block."""

    def __init__(self, meta):
        """
        Creates a conditional invertible block.
        ----------

        Arguments:
        meta      : list -- a list of dictionaries, wherein each dictionary holds parameter - value pairs for a single
                       tf.keras.Dense layer. All coupling nets are assumed to be equal.
        theta_dim : int  -- the number of outputs of the invertible block (eq. the dimensionality of the latent space)
        """

        super(ConditionalInvertibleBlock, self).__init__()
        self.alpha = meta['alpha']
        self.n_out1 = meta['theta_dim'] // 2
        self.n_out2 = meta['theta_dim'] // 2 if meta['theta_dim'] % 2 == 0 else meta['theta_dim'] // 2 + 1
        if meta['permute']:
            self.permutation = Permutation(meta['theta_dim'])
        else:
            self.permutation = None
        self.s1 = CouplingNet(meta, self.n_out1)
        self.t1 = CouplingNet(meta, self.n_out1)
        self.s2 = CouplingNet(meta, self.n_out2)
        self.t2 = CouplingNet(meta, self.n_out2)

    def call(self, theta, x, inverse=False, log_det_J=True):
        """
        Implements both directions of a conditional invertible block.
        ----------

        Arguments:
        theta     : tf.Tensor of shape (batch_size, theta_dim) -- the parameters theta ~ p(theta|y) of interest
        x         : tf.Tensor of shape (batch_size, summary_dim) -- the summarized conditional data of interest x = sum(x)
        inverse   : bool -- flag indicating whether to tun the block forward or backwards
        log_det_J : bool -- flag indicating whether to return the log determinant of the Jacobian matrix
        ----------

        Returns:
        (v, log_det_J)  :  (tf.Tensor of shape (batch_size, inp_dim), tf.Tensor of shape (batch_size, )) --
                           the transformed input, if inverse = False, and the corresponding Jacobian of the transformation
                            if inverse = False
        u               :  tf.Tensor of shape (batch_size, inp_dim) -- the transformed out, if inverse = True
        """

        # --- Forward pass --- #
        if not inverse:

            if self.permutation is not None:
                theta = self.permutation(theta)
            
            u1, u2 = tf.split(theta, [self.n_out1, self.n_out2], axis=-1)

            # Pre-compute network outputs for v1
            s1 = self.s1(u2, x)
            # Clamp s1 if specified
            if self.alpha is not None:
                s1 = (2. * self.alpha / np.pi) * tf.math.atan(s1 / self.alpha)
            t1 = self.t1(u2, x)
            v1 = u1 * tf.exp(s1) + t1

            # Pre-compute network outputs for v2
            s2 = self.s2(v1, x)
            # Clamp s2 if specified
            if self.alpha is not None:
                s2 = (2. * self.alpha / np.pi) * tf.math.atan(s2 / self.alpha)
            t2 = self.t2(v1, x)
            v2 = u2 * tf.exp(s2) + t2
            v = tf.concat((v1, v2), axis=-1)

            if log_det_J:
                # log|J| = log(prod(diag(J))) -> according to inv architecture
                return v, tf.reduce_sum(s1, axis=-1) + tf.reduce_sum(s2, axis=-1)
            return v

        # --- Inverse pass --- #
        else:

            v1, v2 = tf.split(theta, [self.n_out1, self.n_out2], axis=-1)

            # Pre-Compute s2
            s2 = self.s2(v1, x)
            # Clamp s2 if specified
            if self.alpha is not None:
                s2 = (2. * self.alpha / np.pi) * tf.math.atan(s2 / self.alpha)
            u2 = (v2 - self.t2(v1, x)) * tf.exp(-s2)

            # Pre-Compute s1
            s1 = self.s1(u2, x)
            # Clamp s1 if specified
            if self.alpha is not None:
                s1 = (2. * self.alpha / np.pi) * tf.math.atan(s1 / self.alpha)
            u1 = (v1 - self.t1(u2, x)) * tf.exp(-s1)
            u = tf.concat((u1, u2), axis=-1)

            if self.permutation is not None:
                u = self.permutation(u, inverse=True)
            return u


class BayesFlow(tf.keras.Model):
    """Implements a chain of conditional invertible blocks for Bayesian parameter inference."""

    def __init__(self, meta, n_blocks, theta_dim, alpha=1.9, summary_net=None, permute=False):
        """
        Creates a chain of cINN blocks and chains operations.
        ----------

        Arguments:
        meta        : list -- a list of dictionary, where each dictionary holds parameter - value pairs for a single
                                  keras.Dense layer
        n_blocks    : int  -- the number of invertible blocks
        theta_dim   : int  -- the dimensionality of the parameter space to be learned
        summary_net : tf.keras.Model or None -- an optinal summary network for learning the sumstats of x
        permute     : bool -- whether to permute the inputs to the cINN
        """

        super(BayesFlow, self).__init__()

        self.cINNs = [ConditionalInvertibleBlock(meta, theta_dim, alpha=alpha, permute=permute) for _ in range(n_blocks)]
        self.summary_net = summary_net
        self.theta_dim = theta_dim

    def call(self, theta, x, inverse=False):
        """
        Performs one pass through an invertible chain (either inverse or forward).
        ----------

        Arguments:
        theta     : tf.Tensor of shape (batch_size, inp_dim) -- the parameters theta ~ p(theta|x) of interest
        x         : tf.Tensor of shape (batch_size, summary_dim) -- the conditional data x
        inverse   : bool -- flag indicating whether to tun the chain forward or backwards
        ----------

        Returns:
        (z, log_det_J)  :  (tf.Tensor of shape (batch_size, inp_dim), tf.Tensor of shape (batch_size, )) --
                           the transformed input, if inverse = False, and the corresponding Jacobian of the transformation
                            if inverse = False
        x               :  tf.Tensor of shape (batch_size, inp_dim) -- the transformed out, if inverse = True
        """

        if self.summary_net is not None:
            x = self.summary_net(x)
        if inverse:
            return self.inverse(theta, x)
        else:
            return self.forward(theta, x)

    def forward(self, theta, x):
        """Performs a forward pass though the chain."""

        z = theta
        log_det_Js = []
        for cINN in self.cINNs:
            z, log_det_J = cINN(z, x)
            log_det_Js.append(log_det_J)
        # Sum Jacobian determinants for all blocks to obtain total Jacobian.
        log_det_J = tf.add_n(log_det_Js)
        return {'z': z, 'log_det_J': log_det_J}

    def inverse(self, z, x):
        """Performs a reverse pass through the chain."""

        theta = z
        for cINN in reversed(self.cINNs):
            theta = cINN(theta, x, inverse=True)
        return theta

    def sample(self, x, n_samples, to_numpy=False, training=False):
        """
        Samples from the inverse model given a single instance y or a batch of instances.
        ----------

        Arguments:
        x         : tf.Tensor of shape (batch_size, summary_dim) -- the conditioning data of interest
        n_samples : int -- number of samples to obtain from the approximate posterior
        to_numpy  : bool -- flag indicating whether to return the samples as a np.array or a tf.Tensor
        training  : bool -- flag used to indicate that samples are drawn are training time (BatchNorm)
        ----------

        Returns:
        theta_samples : 3D tf.Tensor or np.array of shape (n_samples, n_batch, theta_dim)
        """

        # Summarize obs data if summary net available
        if self.summary_net is not None:
            x = self.summary_net(x, training=training)

        # In case x is a single instance
        if int(x.shape[0]) == 1:
            z_normal_samples = tf.random_normal(shape=(n_samples, self.theta_dim), dtype=tf.float32)
            theta_samples = self.inverse(z_normal_samples, tf.tile(x, [n_samples, 1]))
        # In case of a batch input, send a 3D tensor through the invertible chain and use tensordot
        # Warning: This tensor could get pretty big if sampling a lot of values for a lot of batch instances!
        else:
            z_normal_samples = tf.random_normal(shape=(n_samples, int(x.shape[0]), self.theta_dim), dtype=tf.float32)
            theta_samples = self.inverse(z_normal_samples, tf.stack([x] * n_samples))

        if to_numpy:
            return theta_samples.numpy()
        return theta_samples


class InvariantModule(tf.keras.Model):
    """Implements an invariant nn module as proposed by Bloem-Reddy and Teh (2019)."""

    def __init__(self, meta):
        """
        Creates an invariant function with mean pooling.
        ----------

        Arguments:
        meta : dict -- a dictionary with hyperparameter name - values
        """

        super(InvariantModule, self).__init__()


        self.module = tf.keras.Sequential([
            tf.keras.layers.Dense(**meta['dense_inv_args'])
            for _ in range(meta['n_dense_inv'])
        ])

        self.weights_layer = tf.keras.Sequential([
            tf.keras.layers.Dense(**meta['dense_inv_args'])
            for _ in range(meta['n_dense_inv'])
        ] + 
        [
            tf.keras.layers.Dense(meta['dense_inv_args']['units'])
        
        ])

        self.post_pooling_dense = tf.keras.Sequential([
            tf.keras.layers.Dense(**meta['dense_inv_args'])
            for _ in range(meta['n_dense_inv'])
        ])

    def call(self, x):
        """
        Transofrms the input into an invariant representation.
        ----------

        Arguments:
        x : tf.Tensor of shape (batch_size, n, m) - the input where n is the 'time' or 'samples' dimensions
            over which pooling is performed and m is the input dimensionality
        ----------

        Returns:
        out : tf.Tensor of shape (batch_size, h_dim) -- the pooled and invariant representation of the input
        """

        # Embed
        x = self.module(x)
        
        # Compute weights
        w = tf.nn.softmax(self.weights_layer(x), axis=1)
        w_x = tf.reduce_sum(x * w, axis=1)

        # Increase representational power
        out = self.post_pooling_dense(w_x)
        return out


class EquivariantModule(tf.keras.Model):
    """Implements an equivariant nn module as proposed by Bloem-Reddy and Teh (2019)."""

    def __init__(self, meta):
        """
        Creates an equivariant neural network consisting of a FC network with
        equal number of hidden units in each layer and an invariant module
        with the same FC structure.
        ----------

        Arguments:
        meta : dict -- a dictionary with hyperparameter name - values
        """

        super(EquivariantModule, self).__init__()

        self.module = tf.keras.Sequential([
            tf.keras.layers.Dense(**meta['dense_equiv_args'])
            for _ in range(meta['n_dense_equiv'])
        ])

        self.invariant_module = InvariantModule(meta)

    def call(self, x):
        """
        Transofrms the input into an equivariant representation.
        ----------

        Arguments:
        x : tf.Tensor of shape (batch_size, n, m) - the input where n is the 'time' or 'samples' dimensions
            over which pooling is performed and m is the input dimensionality
        ----------

        Returns:
        out : tf.Tensor of shape (batch_size, h_dim) -- the pooled and invariant representation of the input
        """

        x_inv = self.invariant_module(x)
        x_inv = tf.stack([x_inv] * int(x.shape[1]), axis=1) # Repeat x_inv n times
        x = tf.concat((x_inv, x), axis=-1)
        out = self.module(x)
        return out


class InvariantNetwork(tf.keras.Model):
    """
    Implements a network which parameterizes a
    permutationally invariant function according to Bloem-Reddy and Teh (2019).
    """

    def __init__(self, meta):
        """
        Creates a permutationally invariant network
        consisting of two equivariant modules and one invariant module.
        ----------

        Arguments:
        meta : dict -- hyperparameter settings for the equivariant and invariant modules
        """

        super(InvariantNetwork, self).__init__()

        self.equiv = tf.keras.Sequential([
            EquivariantModule(meta)
            for _ in range(meta['n_equiv'])
        ])
        self.inv = InvariantModule(meta)


    def call(self, x, **kwargs):
        """
        Transofrms the input into a permutationally invariant
        representation by first passing it through multiple equivariant
        modules in order to increase representational power.
        ----------

        Arguments:
        x : tf.Tensor of shape (batch_size, n, m) - the input where n is the 'time' or
        'samples' dimensions over which pooling is performed and m is the input dimensionality
        ----------

        Returns:
        out : tf.Tensor of shape (batch_size, h_dim) -- the pooled and invariant representation of the input
        """

        x = self.equiv(x)
        out = self.inv(x)
        return out

In [183]:
D = tf.random_normal(shape=(10, 50, 1))
theta_g = tf.random_normal((1, 4))
theta_p = tf.random_normal((1, 10, 2))

In [184]:
class HierarchicalBayesFlow(tf.keras.Model):
    
    def __init__(self, meta):
        super(HierarchicalBayesFlow, self).__init__()
        
        self.global_summary = InvariantNetwork(meta['summary_meta'])
        self.summary_net = InvariantNetwork(meta['summary_meta'])
        
        # Invertible level 1
        self.cINNs = [ConditionalInvertibleBlock(meta['inv']) 
                      for _ in range(meta['inv']['n_blocks'])]
        
        # Invertible level 2
        self.cINNs_global = [ConditionalInvertibleBlock(meta['inv_global']) 
                            for _ in range(meta['inv_global']['n_blocks'])]
        
    def call(self, x, theta_g, theta_p):
        """
        x is 3D Np x Nd x Nm
        """
        
        # Compute summaries
        x_local = tf.expand_dims(self.summary_net(x), axis=0) # Np x Ns
        x_global = self.global_summary(x_local)
        x_global_r = tf.stack([x_global] * int(x_local.shape[1]), axis=1)
        x_p = tf.concat([x_local, x_global_r], axis=-1)
        
        z_J_p = self.forward_inv(theta_p, x_p)
        z_J_g = self.forward_inv_global(theta_g, x_global)
         
        return z_J_p, z_J_g
    
    def forward_inv(self, theta, x):
        """Performs a forward pass though the chain."""

        z = theta
        log_det_Js = []
        for cINN in self.cINNs:
            z, log_det_J = cINN(z, x)
            log_det_Js.append(log_det_J)
        # Sum Jacobian determinants for all blocks to obtain total Jacobian.
        log_det_J = tf.add_n(log_det_Js)
        return {'z': z, 'log_det_J': log_det_J}
    
    def forward_inv_global(self, theta, x):
        
        z = theta
        log_det_Js = []
        for cINN in self.cINNs_global:
            z, log_det_J = cINN(z, x)
            log_det_Js.append(log_det_J)
        # Sum Jacobian determinants for all blocks to obtain total Jacobian.
        log_det_J = tf.add_n(log_det_Js)
        return {'z': z, 'log_det_J': log_det_J}
    
    def inference(self, x):
        """Performs a reverse pass through the chain."""

        # Compute summaries
        x_local = tf.expand_dims(self.summary_net(x), axis=0) # Np x Ns
        x_global = self.global_summary(x_local)
        x_global_r = tf.stack([x_global] * int(x_local.shape[1]), axis=1)
        x_p = tf.concat([x_local, x_global_r], axis=-1)

        # Infer hyperparams
        z = tf.random_normal(shape=(1, 4))

        theta_g = z
        for cINN in reversed(self.cINNs_global):
            theta_g = cINN(theta_g, x_global, inverse=True)
        
        # Infer local
        z = tf.random_normal(shape=(1, 10, 2))
        theta_p = z
        for cINN in reversed(self.cINNs):
            theta_p = cINN(theta_p, x_p, inverse=True)
        return theta_g, theta_p

In [185]:
def maximum_likelihood_loss(z, log_det_J, **args):
    """
    Computes the ML loss as described by Ardizzone et al. (in press).
    ----------
    Arguments:
    z         : tf.Tensor of shape (batch_size, z_dim) -- the output of the final CC block f(x; c, W)
    log_det_J : tf.Tensor of shape (batch_size, )      -- the log determinant of the jacobian computed the CC block.

    Output:
    loss : tf.Tensor of shape (,)  -- a single scalar Monte-Carlo approximation of E[ ||z||^2 / 2 - log|det(J)| ]
    """

    return tf.reduce_mean(0.5 * tf.square(tf.norm(z, axis=-1)) - log_det_J)

In [186]:
meta = {
    'summary_meta': {
        'dense_inv_args'   :  dict(units=32, activation='elu', kernel_initializer='glorot_normal'),
        'dense_equiv_args' :  dict(units=16, activation='elu', kernel_initializer='glorot_normal'),
        'n_dense_inv'      :  2,
        'n_dense_equiv'    :  2,
        'n_equiv'          :  2
    },
    'inv': {
        'activation': 'elu',
        'initializer': 'glorot_uniform',
        'alpha': 1.9,
        'theta_dim': 2,
        'permute': None,
        'n_blocks': 3,
        'n_units': [64, 64]
    },
    'inv_global': {
        'activation': 'elu',
        'initializer': 'glorot_uniform',
        'alpha': 1.9,
        'theta_dim': 4,
        'permute': None,
        'n_blocks': 3,
        'n_units': [64, 64]
    }
}


In [187]:
def generate_data(n_p, n_d):
    
    mu_g = np.random.normal()
    sigma_g = np.random.gamma(1)
    a = np.random.exponential(1)
    b = np.random.exponential(1)
    
    mu = np.random.normal(mu_g, sigma_g, size=n_p)
    sigma = np.random.gamma(a, b, size=n_p)
    
    x = np.random.normal(mu, sigma, size=(n_d, n_p)).T[:, :, np.newaxis]
    return (tf.convert_to_tensor(x, dtype=tf.float32), 
            tf.convert_to_tensor(np.array([[mu_g, sigma_g, a, b]]), dtype=tf.float32),
            tf.convert_to_tensor(np.array([mu, sigma]).T[np.newaxis],dtype=tf.float32))

In [188]:
data_gen = partial(generate_data, n_p=10, n_d=50)

In [189]:
def train_online(model, optimizer, data_gen, iterations, p_bar=None,
                 clip_value=5., global_step=None, n_smooth=100):
    """
    Performs a number of training iterations with a given tensorflow model and optimizer.

    ----------

    Arguments:
    model           : tf.keras.Model -- a neural network model implementing a __call__() method
    optimizer       : tf.train.Optimizer -- the optimizer used for backprop
    data_gen        : callable -- a function providing batches of data
    loss_fun        : callable -- a function computing the loss given model outputs
    iterations      : int -- the number of training loops to perform
    batch_size      : int -- the batch_size used for training
    ----------

    Keyword Arguments:
    p_bar           : ProgressBar or None -- an instance for tracking the training progress
    clip_value      : float       -- the value used for clipping the gradients
    clip_method     : str         -- the method used for clipping (default 'global_norm')
    global_step     : tf.Variavle -- a scalar tensor tracking the number of steps and used for learning rate decay  
    ----------

    Returns:
    losses : a dictionary with regularization and loss evaluations at each training iteration
    """
    
    # Prepare a dict for storing losses
    losses = {
        'loss': [],
    }

    # Run training loop
    for it in range(1, iterations+1):

        with tf.GradientTape() as tape:

            # Generate inputs for the network
            x, theta_g, theta_p = data_gen()


            # Forward pass 
            z_J_p, z_J_g = model(x, theta_g, theta_p)
            ml_p = maximum_likelihood_loss(z_J_p['z'], z_J_p['log_det_J'])
            ml_g = maximum_likelihood_loss(z_J_g['z'], z_J_g['log_det_J']) 
            
            # Loss computation and backward pass
            total_loss = ml_p + ml_g

        # One step backprop
        gradients = tape.gradient(total_loss, model.trainable_variables)
        if clip_value is not None:
            gradients, _ = tf.clip_by_global_norm(gradients, clip_value)
        optimizer.apply_gradients(zip(gradients, model.variables), global_step=global_step)

        # Store losses
        losses['loss'].append(total_loss)
        running_loss = total_loss if it < n_smooth else np.mean(losses['loss'][-n_smooth:])

        # Update progress bar
        if p_bar is not None:
            p_bar.set_postfix_str("Iteration: {0},Loss: {1:.3f},Running Loss: {2:.3f}"
            .format(it, total_loss, running_loss))
            p_bar.update(1)
    return losses

In [190]:
starter_learning_rate = 0.001
epochs = 50
global_step = tfe.Variable(0, dtype=tf.int32)
decay_steps = 1000
iterations_per_epoch = 1000
decay_rate = .95
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, decay_steps, decay_rate)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

model = HierarchicalBayesFlow(meta)

In [191]:
# # %%time
# for ep in range(1, epochs+1):
#     with tqdm(total=iterations_per_epoch, desc='Training epoch {}'.format(ep)) as p_bar:
#         losses = train_online(model=model, 
#                               optimizer=optimizer, 
#                               data_gen=data_gen, 
#                               iterations=1000,
#                               p_bar=p_bar,
#                               global_step=global_step)

In [192]:
checkpoint = tf.train.Checkpoint(step=global_step, optimizer=optimizer, net=model)
manager = tf.train.CheckpointManager(checkpoint, './checkpoints/{}'.format('hierarchical'), max_to_keep=3)
checkpoint.restore(manager.latest_checkpoint)
if manager.latest_checkpoint:
    print("Restored from {}".format(manager.latest_checkpoint))
else:
    print("Initializing from scratch.")

Restored from ./checkpoints/hierarchical\ckpt-1


In [193]:
test, theta_g, theta_p = data_gen()

In [195]:
g_samples = []
p_samples = []
for _ in range(50):
    s = model.inference(test)
    g_samples.append(s[0].numpy())
    p_samples.append(s[1].numpy())

In [199]:
np.array(g_samples).mean(axis=0)

array([[-0.3671413 ,  2.7803597 ,  1.2153327 ,  0.53016835]],
      dtype=float32)

In [200]:
theta_g

<tf.Tensor: id=8927806, shape=(1, 4), dtype=float32, numpy=
array([[-0.67196506,  2.9674332 ,  1.4488682 ,  0.35598543]],
      dtype=float32)>

In [204]:
np.array(p_samples).mean(axis=(0, 1))

array([[-2.7528837 ,  1.3863918 ],
       [-0.8163389 ,  0.02282362],
       [ 4.1098576 ,  0.26752824],
       [ 3.0472753 ,  0.3132162 ],
       [ 0.5897017 ,  0.07296287],
       [-1.8944311 ,  0.2140283 ],
       [-1.7764124 ,  0.9352921 ],
       [ 3.6297038 ,  0.03652804],
       [-1.5804875 ,  0.5832627 ],
       [ 0.29272157,  0.89911896]], dtype=float32)

In [205]:
theta_p

<tf.Tensor: id=8927807, shape=(1, 10, 2), dtype=float32, numpy=
array([[[-3.236438  ,  0.9064149 ],
        [-0.9176344 ,  0.01489944],
        [ 4.8029294 ,  0.34953535],
        [ 3.7479672 ,  0.37762964],
        [ 0.6207832 ,  0.07409462],
        [-2.1444337 ,  0.23153763],
        [-2.0648587 ,  0.7919902 ],
        [ 4.1491613 ,  0.02888046],
        [-1.8619667 ,  0.44587857],
        [ 0.1768958 ,  0.79432696]]], dtype=float32)>