In [1]:
class MLP():
    """ Simple MLP with tanh activations """
    def __init__(self, dim_in, dim_out, dims_hidden=[128, 128]):        
        self.num_layers = len(dims_hidden) + 1  # + 1 for the linear output layer
        
        dims_in = [dim_in] + list(dims_hidden)  # [dim_in, hidden_1, hidden_2]
        dims_out = list(dims_hidden) + [dim_out]  # [hidden_1, hidden_2, dim_out]
        
        self.weights, self.biases = list(), list()
        for idx_layer, (d_in, d_out) in enumerate(zip(dims_in, dims_out)):
            # Here we initialize the weights from a Normal distribution (diagonal) with std = 0.1.
            # Note that there are heuristics that work better. 
            # We will use them in the next part of the tutorial.
            weight = tf.Variable(initial_value=tf.random_normal([d_in, d_out], stddev=0.1), 
                                 name="weights_{}".format(idx_layer))
            bias = tf.Variable(initial_value=tf.zeros([d_out]), 
                               name="biases_{}".format(idx_layer))
            self.weights.append(weight)
            self.biases.append(bias)
            
    def __call__(self, inputs):
        x = inputs
        for idx_layer in range(self.num_layers):
            x = tf.add(self.biases[idx_layer], tf.matmul(x, self.weights[idx_layer]))
            if not idx_layer == self.num_layers - 1:  # last layer
                x = tf.tanh(x)
        return x

MyModel = MLP

In [None]:
def get_categorical_log_prob(logits, labels):
    labels = tf.cast(labels, tf.float32)
    probs = tf.nn.softmax(logits)
    log_probs_per_sample = tf.reduce_sum(labels * tf.log(probs + 1e-8), axis=-1)  # small constant for numerical stability of log
    log_prob_batch = tf.reduce_sum(log_probs_per_sample, axis=0)
    # log_prob_batch = tf.reduce_sum(labels * tf.log(probs + 1e-8))  # Could sum directly over both axes.
    return log_prob_batch

def get_accuracy(logits, labels):
    correct_prediction = tf.equal(tf.argmax(logits, axis=-1), tf.argmax(labels, axis=-1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    return accuracy