In [1]:
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import sys


In [2]:
mnist = tf.keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X = np.concatenate((X_train, X_test), axis=0)
n_all_cases = X.shape[0]
input_dim = X.shape[1]
X = np.reshape(X, (n_all_cases, -1))
y = np.concatenate((y_train, y_test)).astype('float32')
label_reshaped = y.reshape(len(y), 1)
onehot_encoder = OneHotEncoder(sparse=False, categories='auto')
onehot_encoded = onehot_encoder.fit_transform(label_reshaped)
X_train, X_test, y_train, y_test = train_test_split(X, onehot_encoded, test_size=0.1, random_state=40)

In [3]:
EPOCHS = 10
MINIBATCH_SIZE = 100
N_BATCHES = len(X_train) // MINIBATCH_SIZE

In [4]:
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10]))

y_true = tf.placeholder(tf.float32, [None, 10])
y_pred = tf.matmul(x, W)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_pred, labels=y_true))

gd_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
correct_mask = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
accuracy = tf.reduce_mean(tf.cast(correct_mask, tf.float32))

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for i in range(EPOCHS):
        for j in range(N_BATCHES):
            batch_x = X_train[j * MINIBATCH_SIZE:(j + 1) * MINIBATCH_SIZE]
            batch_y = y_train[j * MINIBATCH_SIZE:(j + 1) * MINIBATCH_SIZE]
            sess.run(gd_step, feed_dict={x: batch_x, y_true: batch_y})

    ans = sess.run(accuracy, feed_dict={x: X_test, y_true: y_test})


print("Accuracy: {:.4}%".format(ans*100))

Accuracy: 84.01%


In [84]:
def f1(A,x):
    y=tf.matmul(A,x,name='y')
    return y

#for y= Ax, the derivative is: dy/dx= transpose(A)
@tf.custom_gradient
def f2(A,x):
    y=f1(A,x)
    def grad(dzByDy): # dz/dy = 2y reaches here correctly.
        tf.print("tensors:", dzByDy, {2: dzByDy * 2}, output_stream=sys.stderr)
        
        #print("dzByDy={}".format(dzByDy))
        dzByDx=tf.matmul(A,dzByDy,transpose_a=True) 
        return None, dzByDx
    return y,grad

x= tf.constant([[1.],[0.]],name='x')
A= tf.constant([ [1., 2.], [3., 4.]],name='A')

#y=f1(A,x) # This works as desired
y=f2(A,x) #This line gives Error

z=tf.reduce_sum(y*y,name='z')

g=tf.gradients(ys=z,xs=x)

with tf.Session() as sess:
    print(sess.run( g ))

[array([[20.],
       [28.]], dtype=float32)]


In [24]:
mean = 20.0
means1, stdevs1 = tf.constant([0.,mean]), tf.constant([0.7,7.])
means2, stdevs2 = tf.constant([mean,0.]), tf.constant([7.,0.7])
means3, stdevs3 = tf.constant([0.,-mean]), tf.constant([0.7,7.])
means4, stdevs4 = tf.constant([-mean,0.]), tf.constant([7.,0.7])
beta1 = 0.5
beta2 = 0.5
beta3 = 0.5
beta4 = 0.5
mult_dist_1 = tf.contrib.distributions.MultivariateNormalDiag(loc=means1, scale_diag=stdevs1)
mult_dist_2 = tf.contrib.distributions.MultivariateNormalDiag(loc=means2, scale_diag=stdevs2)
mult_dist_3 = tf.contrib.distributions.MultivariateNormalDiag(loc=means3, scale_diag=stdevs3)
mult_dist_4 = tf.contrib.distributions.MultivariateNormalDiag(loc=means4, scale_diag=stdevs4)
stdevs1_inv = tf.diag(tf.ones(2)/stdevs1)
stdevs2_inv = tf.diag(tf.ones(2)/stdevs2)
stdevs3_inv = tf.diag(tf.ones(2)/stdevs3)
stdevs4_inv = tf.diag(tf.ones(2)/stdevs4)

In [75]:
@tf.custom_gradient
def penalty_term_with_gd(z):


    mix_dist = beta1 * mult_dist_1.prob(z) + beta2 * mult_dist_2.prob(z) + beta3 * mult_dist_3.prob(z) + beta4 * mult_dist_4.prob(z) + 1e-6
    y=-tf.reduce_sum(tf.log(mix_dist))
        
    def grad(dz): 
        c1 = beta1 * mult_dist_1.prob(z)*stdevs1_inv * (z - means1)
        c2 = beta2 * mult_dist_2.prob(z)*stdevs2_inv * (z - means2)
            
        c3 = beta3 * mult_dist_3.prob(z)*stdevs3_inv * (z - means3)
        c4 = beta4 * mult_dist_4.prob(z)*stdevs4_inv * (z - means4)
            
        denom = beta1 * mult_dist_1.prob(z) + beta2 * mult_dist_2.prob(z) + beta3 * mult_dist_3.prob(z) + beta4 * mult_dist_4.prob(z) + 1e-6
            
        gradient = tf.matmul(((c1 + c2 + c3 + c4) / denom), dz)
        return gradient
    return y,grad

x = tf.placeholder(dtype=tf.float32)
prob = penalty_term_with_gd(x)


In [76]:
z = tf.constant([[1., 4.],
                 [3.,1.],
                 [2.,3.],
                 [0.5,0.5],
                 [0.2,2.1]
                ],name='z')

In [77]:
z  = [[1., 4.],
                 [3.,1.],
                 [2.,3.],
                 [0.5,0.5],
                 [0.2,2.1]
                ]


In [79]:
with tf.Session() as sess:
    r = sess.run(prob, feed_dict={x: z})
    print(r)

41.0246


In [61]:
mix_dist = beta1 * mult_dist_1.prob(z) + beta2 * mult_dist_2.prob(z) + beta3 * mult_dist_3.prob(z) + beta4 * mult_dist_4.prob(z) + 1e-6
tf.log(mix_dist)

ValueError: Dimensions must be equal, but are 3 and 2 for 'MultivariateNormalDiag_4/prob_25/affine_linear_operator/inverse/sub' (op: 'Sub') with input shapes: [2,3], [2].

In [59]:
y = penalty_term_with_gd(z)

ValueError: Dimensions must be equal, but are 3 and 2 for 'MultivariateNormalDiag_4/prob_24/affine_linear_operator/inverse/sub' (op: 'Sub') with input shapes: [2,3], [2].

In [60]:
g=tf.gradients(ys=y,xs=z)

In [49]:
with tf.Session() as sess:
    print(sess.run( g ))

[array([[0.23243468],
       [0.        ]], dtype=float32)]


In [140]:
tf.reset_default_graph() 
# init_betas = (0.5 * np.ones(10,dtype="float32")).reshape((10,1))
# betas = tf.get_variable(initializer=init_betas, name="betas")
def dists_from_angles(thetas, init_cov,r):
    cx, cy=0,0
#     r=10
    def dist_from_angle(theta, init_cov):
        mean_1 = cx + r * tf.math.cos(theta)
        mean_2 = cx + r * tf.math.sin(theta)
        tf_mean = tf.convert_to_tensor([mean_1, mean_2], dtype=tf.float64)
        u = tf.convert_to_tensor([
            [tf.math.cos(theta),-tf.math.sin(theta)],
            [tf.math.sin(theta),tf.math.cos(theta)]
        ], dtype=tf.float64)

        tf_stdev = tf.matmul(tf.matmul(u,init_cov),tf.transpose(u))
        tf_dist = tf.contrib.distributions.MultivariateNormalFullCovariance(loc=tf_mean, covariance_matrix=tf_stdev)
        return tf_dist   

#     dists = tf.map_fn(lambda theta: dist_from_angle(theta, init_cov), thetas)
    dists = [dist_from_angle(theta, init_cov) for theta in thetas]
    return dists

In [141]:
def penalty_term_with_gd_only_tf(z, angles, init_cov,r):
    tf_dists = dists_from_angles(angles, init_cov,r)
    probs = tf.stack([tf.cast(dist.prob(tf.cast(z, tf.float64)), tf.float32) for dist in tf_dists])
    betas = tf.convert_to_tensor((np.ones(10)/10).reshape((10,1)), dtype=tf.float32)
    denom = tf.reduce_sum(betas * probs, axis=0)
    denom = tf.reshape(denom, (-1, 1))

    y=-tf.reduce_sum(tf.log(1e-30 + denom))

    return y

In [142]:
init_cov = np.array([
    [7.0,0.0],
    [0.0,0.7]
], dtype="float64")

In [143]:
z = tf.constant([[1., 4.],
                 [3.,1.],
                 [2.,3.],
                 [0.5,0.5],
                 [0.2,2.1]
                ],name='z')

In [144]:
# thetas = tf.constant([(2*i*np.pi)/10 for i in range(10)])
# cost = penalty_term_with_gd_only_tf(z,thetas,init_cov)
# thetas = tf.constant([(2*i*np.pi)/10 for i in range(10)])
# penalty_term_with_gd_only_tf(z,thetas,init_cov,r=20)


init_angles = [(2*i*np.pi)/10 for i in range(10)]
thetas = tf.get_variable(initializer=init_angles, name="thetas")
cost = penalty_term_with_gd_only_tf(z,tf.unstack(thetas),init_cov,r=10)

In [145]:

init_op = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init_op)
    print(sess.run( cost ))

42.786587


In [114]:
z

<tf.Tensor 'z:0' shape=(5, 2) dtype=float32>

NameError: name 'DynamicGmmOnCircle' is not defined

In [151]:
0.0005*500

0.25

In [10]:
sess = tf.InteractiveSession()

In [7]:
init_cov = np.array([
    [0.007,0.0],
    [0.0,0.0007]
], dtype="float64")
ex_mean = tf.convert_to_tensor([1.0,0.0], dtype=tf.float64)

z = tf.convert_to_tensor([[1., 4.],
                 [3., 1.],
                 [2., 3.],
                 [0.5, 0.5],
                 [0.2, 2.1]
                 ], dtype=tf.float64)
tf_dist_1 = tf.contrib.distributions.MultivariateNormalFullCovariance(loc=ex_mean, covariance_matrix=init_cov)
tf_dist_2 = tf.contrib.distributions.MultivariateNormalFullCovariance(loc=tf.convert_to_tensor([-1.0,0.0], dtype=tf.float64), covariance_matrix=init_cov)


In [11]:
grad = tf.gradients(tf_dist_1.log_prob(z), ex_mean)

In [12]:
sess.run(tf.global_variables_initializer())
sess.run(grad)

[array([  242.85714286, 15142.85714286])]

In [14]:
from vae_lib.utils.static_gmm_on_circle import StaticGmmOnCircle

init_angles = [(2 * i * np.pi) / 10 for i in range(10)]
thetas = tf.convert_to_tensor(init_angles, name="thetas")

s_gmm = StaticGmmOnCircle(tf.unstack(thetas), init_cov, r=10, K=10)
cost = s_gmm.penalty_term_with_gd_only_tf(z)

In [24]:
sess.run(tf.global_variables_initializer())
sess.run(grad)

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]

In [55]:
import numpy as np
import tensorflow as tf


class StaticGmmOnCircle:

    def __init__(self, thetas, init_cov, r, K):
        self.thetas = thetas
        self.init_cov = init_cov
        self.r = r
        self.cx, self.cy = 0, 0
        self.betas = tf.convert_to_tensor((np.ones(K) / K).reshape((1, K)), dtype=tf.float32)
        tf_dists_means_stdevs = self.dists_from_angles()
        self.tf_dists = [d_m_s[0] for d_m_s in tf_dists_means_stdevs]
        self.means = [d_m_s[1] for d_m_s in tf_dists_means_stdevs]
        self.stdevs = [d_m_s[2] for d_m_s in tf_dists_means_stdevs]


    def dist_from_angle(self, theta):
        mean_1 = self.cx + self.r * tf.math.cos(theta)
        mean_2 = self.cx + self.r * tf.math.sin(theta)
        tf_mean = tf.convert_to_tensor([mean_1, mean_2], dtype=tf.float64)
        u = tf.convert_to_tensor([
            [tf.math.cos(theta), -tf.math.sin(theta)],
            [tf.math.sin(theta), tf.math.cos(theta)]
        ], dtype=tf.float64)

        tf_stdev = tf.matmul(tf.matmul(u, self.init_cov), tf.transpose(u))
        tf_dist = tf.contrib.distributions.MultivariateNormalFullCovariance(loc=tf_mean, covariance_matrix=tf_stdev)
        return tf_dist, tf_mean, tf_stdev

    def dists_from_angles(self):

        dists = [self.dist_from_angle(theta) for theta in self.thetas]
        return dists

    def generate_samples(self, n):
        return tf.concat([np.c_[dist.sample(sample_shape=[n]),i*np.ones(n)] for i, dist in enumerate(self.tf_dists)], axis=0)

    def penalty_term_with_gd_only_tf(self, z):
        probs = tf.stack([tf.cast(dist.prob(tf.cast(z, tf.float64)), tf.float32) for dist in self.tf_dists], axis=1)
        denom = tf.reshape(tf.reduce_sum(self.betas * probs, axis=1), shape=(-1, 1))
        cost_vec = -tf.reduce_sum(tf.log(1e-30 + denom), axis=0)
        return tf.reshape(cost_vec, [])

    def penalty_term_supervised(self, z, labels):
        probs = tf.stack([tf.cast(dist.prob(tf.cast(z, tf.float64)), tf.float32) for dist in self.tf_dists], axis=1)

        log_probs = tf.log(1e-30 + probs)
        log_betas = tf.log(1e-30 + self.betas)
        prob_for_single_gauss = tf.transpose(log_betas + log_probs)
        log_prob_for_cases = tf.matmul(labels, prob_for_single_gauss)
        log_prob = tf.reduce_sum(log_prob_for_cases)
        return -log_prob

In [57]:
s_gmm.tf_dists

[<tfp.distributions.MultivariateNormalFullCovariance 'MultivariateNormalFullCovariance_84/' batch_shape=() event_shape=(2,) dtype=float64>,
 <tfp.distributions.MultivariateNormalFullCovariance 'MultivariateNormalFullCovariance_85/' batch_shape=() event_shape=(2,) dtype=float64>,
 <tfp.distributions.MultivariateNormalFullCovariance 'MultivariateNormalFullCovariance_86/' batch_shape=() event_shape=(2,) dtype=float64>,
 <tfp.distributions.MultivariateNormalFullCovariance 'MultivariateNormalFullCovariance_87/' batch_shape=() event_shape=(2,) dtype=float64>,
 <tfp.distributions.MultivariateNormalFullCovariance 'MultivariateNormalFullCovariance_88/' batch_shape=() event_shape=(2,) dtype=float64>,
 <tfp.distributions.MultivariateNormalFullCovariance 'MultivariateNormalFullCovariance_89/' batch_shape=() event_shape=(2,) dtype=float64>,
 <tfp.distributions.MultivariateNormalFullCovariance 'MultivariateNormalFullCovariance_90/' batch_shape=() event_shape=(2,) dtype=float64>,
 <tfp.distributions.

In [58]:
grad = tf.gradients(s_gmm.penalty_term_with_gd_only_tf(z), s_gmm.means)

In [59]:
sess.run(tf.global_variables_initializer())
sess.run(grad)

[array([-0., -0.]),
 array([-0., -0.]),
 array([-0., -0.]),
 array([-0., -0.]),
 array([-0., -0.]),
 array([-0., -0.]),
 array([-0., -0.]),
 array([-0., -0.]),
 array([-0., -0.]),
 array([-0., -0.])]

In [194]:
init_angles = [(2 * i * np.pi) / 10 for i in range(10)]
thetas = tf.convert_to_tensor(init_angles, name="thetas")
init_cov = np.array([
    [0.07,0.0],
    [0.0,0.007]
], dtype="float64")

s_gmm = StaticGmmOnCircle(tf.unstack(thetas), init_cov, r=5, K=10)

In [195]:
z = tf.convert_to_tensor([[4., 0.], [-4., 0.]], dtype=tf.float64)

In [212]:
labels = tf.convert_to_tensor([
    [1.,0.,0.,0.,0.,0.,0.,0.,0.,0.],
    [0.,0.,0.,0.,0.,1.,0.,0.,0.,0.]
])

In [213]:
# labels = tf.convert_to_tensor([
#     [1.,1.,1.,1.,1.,1.,1.,1.,1.,1.],
#     [1.,1.,1.,1.,1.,1.,1.,1.,1.,1.]
# ])

In [224]:
at_thi_point = s_gmm.means-tf.convert_to_tensor(1.0)
grad = tf.gradients(s_gmm.penalty_term_supervised(at_thi_point, labels), s_gmm.means)
sess.run(tf.global_variables_initializer())
sess.run(grad)

[array([ 5.35780014e-11, -2.25818768e-09]),
 array([0., 0.]),
 array([0., 0.]),
 array([0., 0.]),
 array([0., 0.]),
 array([2.10437417e-10, 3.18208668e-10]),
 array([0., 0.]),
 array([0., 0.]),
 array([0., 0.]),
 array([0., 0.])]

In [226]:
s_gmm.tf_dists[0]

<tfp.distributions.MultivariateNormalFullCovariance 'MultivariateNormalFullCovariance_274/' batch_shape=() event_shape=(2,) dtype=float64>

In [227]:
at_thi_point = s_gmm.means-tf.convert_to_tensor(1.0)
grad = tf.gradients(s_gmm.tf_dists[0].log_prob(z), s_gmm.means[0])
sess.run(tf.global_variables_initializer())
sess.run(grad)

[array([-142.85714286,   -0.        ])]

In [228]:
z


<tf.Tensor 'Const_60:0' shape=(2, 2) dtype=float64>