In [1]:
import theano

Using cuDNN version 5105 on context None
Mapped name None to device cuda: GeForce GT 740M (0000:01:00.0)


In [4]:
from theano import gradient,function
import numpy as np
import theano.tensor as T
from sklearn import mixture
from theano_utils import calc_log_prob_gauss_vector

class GMMGradOp(theano.Op):
    def __init__(self,gm_num, ndim):
        super(GMMGradOp,self).__init__()  
        self.otypes = [T.fmatrix]
        self.itypes = [T.fvector]   
        self.ndim = ndim
        self.gm_num = gm_num
        self.reg_coef = 1e-5
        Yvec = T.dvector('Y')
        meansvec = T.dvector('means')
        covarsvec = T.dvector('covars')
        weights = T.dvector('weights')
        lam = T.dscalar('lambda')
        gm_num = weights.shape[0]
        ndim = meansvec.shape[0] // gm_num
        Y = T.reshape(Yvec, (Yvec.shape[0] // ndim, ndim))
        means = T.reshape(meansvec, (gm_num, meansvec.shape[0] // gm_num))
        covars = T.reshape(covarsvec, (gm_num, meansvec.shape[0] // gm_num))
        LL = calc_log_prob_gauss_vector(Y, means,covars,weights)
        
        LL_lag = T.sum(LL) + lam * (T.sum(weights) - 1)
        
        LLg = gradient.jacobian(LL_lag, [Yvec, meansvec, covarsvec, weights, lam])
               
        llhm = gradient.jacobian(LLg[1], [Yvec, meansvec, covarsvec, weights])
        llhc = gradient.jacobian(LLg[2], [Yvec, meansvec, covarsvec, weights])
        llhw = gradient.jacobian(LLg[3], [Yvec, meansvec, covarsvec, weights, lam])
        
        self.gmm_hm = function([Yvec, meansvec, covarsvec, weights, lam], llhm, allow_input_downcast=True)
        self.gmm_hc = function([Yvec, meansvec, covarsvec, weights, lam], llhc, allow_input_downcast=True)
        self.gmm_hw = function([Yvec, meansvec, covarsvec, weights, lam], llhw, allow_input_downcast=True)
        
    def solve_lin_sys_for_gmm(self, Xvec, meansvec, covarsvec, weights):
        gm_num = len(weights)
        n_dim = self.ndim
        n_samples = len(Xvec)//n_dim
        lam = n_samples
        hm = self.gmm_hm(Xvec, meansvec, covarsvec, weights, lam)
        hc = self.gmm_hc(Xvec, meansvec, covarsvec, weights, lam)
        hw = self.gmm_hw(Xvec, meansvec, covarsvec, weights, lam)
        mean_row = np.concatenate((hm[1], hm[2], hm[3], np.zeros((len(meansvec), 1))), axis=1)
        cov_row = np.concatenate((hc[1], hc[2], hc[3], np.zeros((len(meansvec), 1))), axis=1)
        weight_row = np.concatenate((hw[1], hw[2], hw[3], np.reshape(hw[4], (gm_num, 1))), axis=1)
        lambda_row = np.concatenate(
            (np.zeros((1, len(meansvec))),
             np.zeros((1, len(meansvec))),
             np.reshape(hw[4], (1, gm_num)),
             np.zeros((1, 1))), axis=1)
        M = np.concatenate((mean_row, cov_row, weight_row, lambda_row))
        N = np.concatenate((-hm[0], -hc[0], -hw[0], np.zeros((1, hw[0].shape[1]))))
        print M
        par_dim = gm_num * n_dim
        a = np.diag(M[0:par_dim, 0:par_dim])
        A = np.diag(a)
        b = np.diag(M[0:par_dim, par_dim:2 * par_dim])
        B = np.diag(b)
        c = np.diag(M[par_dim:2 * par_dim, par_dim:2 * par_dim])
        C = np.diag(c)
        if (np.linalg.norm(A - M[0:par_dim, 0:par_dim]) < 1e-15 and
            np.linalg.norm(B - M[0:par_dim, par_dim:2*par_dim]) < 1e-15 and
            np.linalg.norm(C - M[par_dim:2*par_dim, par_dim:2 * par_dim]) < 1e-15):
            D = M[2 * par_dim:, 2 * par_dim:]
            if (np.linalg.matrix_rank(A) < A.shape[0]):
                a = a + np.ones(a.shape[0])*self.reg_coef
            if (np.linalg.matrix_rank(C) < C.shape[0]):
                c = c + np.ones(a.shape[0]) * self.reg_coef
            if (np.linalg.matrix_rank(D) < D.shape[0]):
                D = D + np.eye(D.shape[0]) * self.reg_coef
            Di = np.linalg.inv(D)
            e = 1. / (a - b / c * b)
            f = -e * b / c
            h = (np.ones(a.shape[0]) - f * b) / c
            dX = np.zeros(N.shape)
            for i in range(0, n_samples):
                N1 = N[:, i * n_dim:(i + 1) * n_dim]
                for gi in range(0, gm_num):
                    n_mu_gi = np.diag(N1[gi * n_dim:(gi + 1) * n_dim, 0:n_dim])
                    e_gi = e[gi * n_dim:(gi + 1) * n_dim]
                    n_cov_gi = np.diag(
                        N1[n_dim * gm_num + gi * n_dim:n_dim * gm_num + (gi + 1) * n_dim, 0:n_dim])
                    f_gi = f[gi * n_dim:(gi + 1) * n_dim]
                    h_gi = h[gi * n_dim:(gi + 1) * n_dim]
                    dX[gi * n_dim: (gi + 1) * n_dim, i * n_dim:(i + 1) * n_dim] = np.diag(
                        e_gi * n_mu_gi + f_gi * n_cov_gi)
                    dX[n_dim * gm_num + gi * n_dim: n_dim * gm_num + (gi + 1) * n_dim,
                    i * n_dim: (i + 1) * n_dim] = np.diag(f_gi * n_mu_gi + h_gi * n_cov_gi)
            dX[n_dim * 2 * gm_num:, :] = Di.dot(N[n_dim * 2 * gm_num:, :])
        else:
            M = M + self.reg_coef * np.eye(M.shape[0])
            dX = np.linalg.solve(M, N)
        return dX
    

    def perform(self, node, (X,), output_storage):
        gmm = mixture.GaussianMixture(covariance_type='diag',n_components=self.gm_num, max_iter=2000)
        gmm.fit(X.reshape((-1,self.ndim)))
        means = np.copy(gmm.means_).astype(np.float32)
        covars = np.copy(gmm.covariances_).astype(np.float32)
        weights = np.copy(gmm.weights_).astype(np.float32)
        dX = self.solve_lin_sys_for_gmm(X.flatten(),means.flatten(),covars.flatten(),weights.flatten())
        output_storage[0][0]=dX[0:dX.shape[0]-1, :].astype(np.float32)

class GMMOp(theano.Op):
    def __init__(self,gm_num,ndim):
        """
        fit gmm with diagonal covariances to input vectors
        input: vector[n_samples*n_dim] flattened
        output: vector means[gm_num*n_dim],covars[gm_num*n_dim],weights[gm_num], flattened
        """
        super(GMMOp, self).__init__()
        self.otypes = [T.fvector]
        self.itypes = [T.fvector]
        self.gm_num = gm_num
        self.ndim = ndim
        self.gmm = mixture.GaussianMixture(covariance_type='diag',n_components=self.gm_num, max_iter=2000)

    def perform(self, node, (X,), output_storage):       
        self.gmm.fit(X.reshape((-1,self.ndim)))
        means = self.gmm.means_.flatten()
        covars = self.gmm.covariances_.flatten()
        weights = self.gmm.weights_.flatten()
        output_storage[0][0] = np.concatenate((means,covars,weights)).astype(np.float32)
        
    def grad(self, (X,), output_grads):
        return [output_grads[0].dot(GMMGradOp(self.gm_num,self.ndim)(X))]
    
def get_gmm(X,gm_num,ndims):
    f = GMMOp(gm_num,ndims)(X.flatten())
    means = f[:gm_num*ndims].reshape((gm_num,ndims))
    covars = f[gm_num*ndims:2*gm_num*ndims].reshape((gm_num,ndims))
    weights = f[2*gm_num*ndims:]
    return means,covars,weights

In [5]:
def test_1_gmm(n_samp,dim):
    X = T.fmatrix("X")
    m1,c1,w1 =  get_gmm(X,1,dim)
    
    m2 = T.mean(X,axis=0).reshape((1,-1))
    c2 = (T.std(X,axis=0)+0.0001).reshape((1,-1))
    
    f = function([X],[m1,c1,w1,m2,c2])
    res = f(np.random.randn(n_samp,dim).astype(np.float32))
    
    print'dif mean,dif cov, w', np.abs(res[0]-res[3]).mean(),np.abs(res[1]-res[4]).mean(),np.abs(res[2]).sum()
    
    dm1 = gradient.jacobian(m1.flatten(),[X])
    dc1 = gradient.jacobian(c1.flatten(),[X])
    dw1 = gradient.jacobian(w1.flatten(),[X])
    dm2 = gradient.jacobian(m2.flatten(),[X])
    dc2 = gradient.jacobian(m2.flatten(),[X])
    
    df = function([X],dm1+dc1+dw1+dm2+dc2)
    res = df(np.random.randn(n_samp,dim).astype(np.float32))
    print'dif mean,dif cov, w', np.abs(res[0]-res[3]).mean(),np.abs(res[1]-res[4]).mean(),np.abs(res[2]).sum()

test_1_gmm(10,2)

dif mean,dif cov, w 0.0 0.218542 1.0
[[ -7.63563650e+00   0.00000000e+00   4.34390790e-09   0.00000000e+00
    0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00  -1.09307430e+01   0.00000000e+00  -2.22550972e-08
    0.00000000e+00   0.00000000e+00]
 [  4.34390760e-09   0.00000000e+00  -2.91514272e+00   0.00000000e+00
    0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00  -2.22550972e-08   0.00000000e+00  -5.97404388e+00
    0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   -1.00000000e+01   1.00000000e+00]
 [  0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
    1.00000000e+00   0.00000000e+00]]
[[ -7.63563650e+00   0.00000000e+00   4.34390790e-09   0.00000000e+00
    0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00  -1.09307430e+01   0.00000000e+00  -2.22550972e-08
    0.00000000e+00   0.00000000e+00]
 [  4.34390760e-09   0.00000000e+00  -2.91514272e+00   0.00000000e+00
    0.00000000e+00   0.00000000e+00]

In [10]:
np.set_printoptions(1)
X = T.fmatrix("X")
m1,c1,w1 =  get_gmm(X,2,2)
dm1 = gradient.jacobian(m1.flatten(),[X])

df = function([X],dm1)
res = df(np.random.rand(100,2).astype(np.float32))

[[ -3.3e+03   2.1e+01   3.6e+02  -4.8e+00   3.0e+03   1.1e+01  -1.9e+03
   -2.6e+00   3.7e+01  -7.8e+01   0.0e+00]
 [  2.1e+01  -8.3e+02   1.9e+01  -2.0e+01   1.4e+02   2.4e+01  -1.1e+02
   -1.4e+01   1.8e+00  -3.8e+00   0.0e+00]
 [  3.6e+02   1.9e+01  -1.7e+03  -4.1e+00   2.3e+03   1.5e+01  -2.8e+03
   -7.6e+00   3.5e+01  -7.2e+01   0.0e+00]
 [ -4.8e+00  -2.0e+01  -4.1e+00  -3.9e+02  -3.3e+01  -2.6e+01   1.9e+01
    1.4e+01  -4.2e-01   8.7e-01   0.0e+00]
 [  3.0e+03   1.4e+02   2.3e+03  -3.3e+01  -7.9e+04   5.5e+01  -1.1e+04
    2.3e-01   2.6e+02  -5.4e+02   0.0e+00]
 [  1.1e+01   2.4e+01   1.5e+01  -2.6e+01   5.5e+01  -5.3e+03  -1.2e+02
   -5.4e+01   1.2e+00  -2.4e+00   0.0e+00]
 [ -1.9e+03  -1.1e+02  -2.8e+03   1.9e+01  -1.1e+04  -1.2e+02  -4.6e+04
    8.0e+01  -2.1e+02   4.3e+02   0.0e+00]
 [ -2.6e+00  -1.4e+01  -7.6e+00   1.4e+01   2.3e-01  -5.4e+01   8.0e+01
   -2.5e+03  -4.6e-01   9.5e-01   0.0e+00]
 [  3.7e+01   1.8e+00   3.5e+01  -4.2e-01   2.6e+02   1.2e+00  -2.1e+02
   -4.6e