In [1]:
import theano

Using cuDNN version 5110 on context None
Mapped name None to device cuda0: GeForce GTX 660 Ti (0000:01:00.0)


In [61]:
from theano import gradient,function
import numpy as np
import theano.tensor as T
from sklearn import mixture
from theano_utils import calc_log_prob_gauss_vector

class GMM():
    def __init__(self,gm_num):
        self.X = None
        self.gmm = mixture.GaussianMixture(covariance_type='diag',n_components=gm_num, max_iter=2000)
        
    def fit(self,X):
        if(not self.X is None):
            if(self.X.shape == X.shape):
                if(np.abs(self.X-X).mean()<1e-10):
                    return ;
        self.X = X
        self.gmm.fit(X)
        
    @property
    def weights_(self):
        return self.gmm.weights_
    
    @property
    def covariances_(self):
        return self.gmm.covariances_
    
    @property
    def means_(self):
        return self.gmm.means_
        
        
class GMMOp(theano.Op):
    def __init__(self,gm_num,ndim,gmm=None):
        """
        fit gmm with diagonal covariances to input vectors
        input: vector[n_samples*n_dim] flattened
        output: vector means[gm_num*n_dim],covars[gm_num*n_dim],weights[gm_num], flattened
        """
        super(GMMOp, self).__init__()
        self.otypes = [T.fvector]
        self.itypes = [T.fvector]
        self.gm_num = gm_num
        self.ndim = ndim
        if(gmm is None):
            self.gmm =GMM(self.gm_num)
        else:
            self.gmm = gmm
        self.reg_coef = 1e-5

    def perform(self, node, (X,), output_storage):       
        self.gmm.fit(X.reshape((-1,self.ndim)))
        means = self.gmm.means_.flatten()
        covars = self.gmm.covariances_.flatten()
        weights = self.gmm.weights_.flatten()
        output_storage[0][0] = np.concatenate((means,covars,weights)).astype(np.float32)
            
    def build_lagrangian(self,Yvec,meansvec,covarsvec,weights,lam):
        n_dim = self.ndim
        gm_num = self.gm_num
        Y = Yvec.reshape((-1,n_dim))
        means = T.reshape(meansvec, (gm_num, n_dim))
        covars = T.reshape(covarsvec, (gm_num,n_dim))
        log_prob = calc_log_prob_gauss_vector(Y, means,covars,weights)        
        return T.sum(log_prob) + lam * (T.sum(weights) - 1) 
        
    def build_linear_system(self,Yvec,meansvec,covarsvec,weights,lam):
        n_dim = self.ndim
        gm_num = self.gm_num
        lagrangian = self.build_lagrangian(Yvec,meansvec,covarsvec,weights,lam)
        d_lagrangian = gradient.jacobian(lagrangian, [meansvec, covarsvec, weights],\
                              consider_constant=[Yvec, meansvec, covarsvec, weights])
        hm = gradient.jacobian(d_lagrangian[0], [Yvec, meansvec, covarsvec, weights],\
                              consider_constant=[Yvec, meansvec, covarsvec, weights])
        hc = gradient.jacobian(d_lagrangian[1], [Yvec, meansvec, covarsvec, weights],\
                              consider_constant=[Yvec, meansvec, covarsvec, weights])
        hw = gradient.jacobian(d_lagrangian[2], [Yvec, meansvec, covarsvec, weights, lam],\
                              consider_constant=[Yvec, meansvec, covarsvec, weights,lam])
        
        mean_row = T.concatenate((hm[1], hm[2], hm[3], T.zeros((n_dim*gm_num, 1))), axis=1)
        cov_row = T.concatenate((hc[1], hc[2], hc[3], T.zeros((n_dim*gm_num, 1))), axis=1)
        
        weight_row = T.concatenate((hw[1], hw[2], hw[3], T.reshape(hw[4], (gm_num, 1))), axis=1)
        lambda_row = T.concatenate(
            (T.zeros((1, 2*n_dim*gm_num)),
             T.reshape(hw[4], (1, gm_num)),
             T.zeros((1, 1))), axis=1)
        M = T.concatenate((mean_row, cov_row, weight_row, lambda_row))
        N = T.concatenate((-hm[0], -hc[0], -hw[0], T.zeros((1, hw[0].shape[1]))))
        return N,M#MX = N
    
    def solve_diag_linear(self,N,a,b,c,D):#MX=N
        '''
          |A B _|
        M=|_ C _|
          |_ _ D|
        '''
        n_dim = self.ndim
        gm_num = self.gm_num
        n_samples = N.shape[1]//n_dim
        a = a + T.ones_like(a)*self.reg_coef
        c = c + T.ones_like(c) * self.reg_coef
        D = D + T.eye(D.shape[0]) * self.reg_coef
        e = 1. / (a - b / c * b)
        f = -e * b / c
        h = (T.ones_like(a) - f * b) / c
        
        e = e.reshape((gm_num,n_dim))
        h = h.reshape((gm_num,n_dim))
        f = f.reshape((gm_num,n_dim))
        
        eye = T.eye(n_dim)            
        mu = N[:gm_num*n_dim].reshape((gm_num,n_dim,n_samples,n_dim))*eye[None,:,None,:]
        cov = N[gm_num*n_dim:2*gm_num*n_dim].reshape((gm_num,n_dim,n_samples,n_dim))*eye[None,:,None,:]
        dX1 = e[:,None,None,:] * mu + f[:,None,None,:] * cov
        dX2 = f[:,None,None,:] * mu + h[:,None,None,:] * cov
        
        
        dX1 = dX1.reshape((-1,n_samples*n_dim))
        dX2 = dX2.reshape((-1,n_samples*n_dim))        
        Di = T.nlinalg.matrix_inverse(D)
        dX3 = Di.dot(N[n_dim * 2 * gm_num:, :])
        
        dX = T.concatenate((dX1,dX2,dX3),axis=0)
        return dX
        
    def solve_general_linear(self,N,M):#MX=N
        M = M + self.reg_coef * T.eye(M.shape[0])
        Mi = T.nlinalg.matrix_inverse(M)
        return M.dot(N)
    
    def solve_linear_system(self,N,M):
        par_dim = self.ndim*self.gm_num
        def diag(M):            
            a = T.diag(M)
            A = T.diag(a)
            return a,abs(A - M).sum()
        a,na = diag(M[0:par_dim, 0:par_dim])
        b,nb = diag(M[0:par_dim, par_dim:2 * par_dim])
        c,nc = diag(M[par_dim:2 * par_dim, par_dim:2 * par_dim])
        D = M[2 * par_dim:, 2 * par_dim:]
        return theano.ifelse.ifelse(T.le(na+nb+nc,1e-6),\
                             self.solve_diag_linear(N,a,b,c,D),\
                             self.solve_general_linear(N,M))
        return dX
    
    def grad(self, (Yvec,), output_grads):
        gm_num,ndim = self.gm_num,self.ndim
        gmm_res = GMMOp(gm_num,ndim,self.gmm)(Yvec)
        meansvec = gmm_res[:gm_num*ndim]
        covarsvec = gmm_res[gm_num*ndim:2*gm_num*ndim]
        weights = gmm_res[gm_num*ndim*2:]
        n_samples = Yvec.shape[0]//self.ndim
        N,M = self.build_linear_system(Yvec,meansvec, covarsvec, weights, n_samples)
        dX = self.solve_linear_system(N,M)
        return [output_grads[0].dot(dX[0:dX.shape[0]-1, :])]


def get_gmm(X,gm_num,ndims):
    f = GMMOp(gm_num,ndims)(X.flatten())
    means = f[:gm_num*ndims].reshape((gm_num,ndims))
    covars = f[gm_num*ndims:2*gm_num*ndims].reshape((gm_num,ndims))
    weights = f[2*gm_num*ndims:]
    return means,covars,weights

In [62]:
def test_1_gmm(n_samp,dim):
    X = T.fmatrix("X")
    m1,c1,w1 =  get_gmm(X,1,dim)
    
    m2 = T.mean(X,axis=0).reshape((1,-1))
    c2 = (T.std(X,axis=0)+0.0001).reshape((1,-1))
    
    f = function([X],[m1,c1,w1,m2,c2])
    res = f(np.random.randn(n_samp,dim).astype(np.float32))
    
    print'dif mean,dif cov, w', np.abs(res[0]-res[3]).mean(),np.abs(res[1]-res[4]).mean(),np.abs(res[2]).sum()
    
    dm1 = gradient.jacobian(m1.flatten(),[X])
    dc1 = gradient.jacobian(c1.flatten(),[X])
    dw1 = gradient.jacobian(w1.flatten(),[X])
    dm2 = gradient.jacobian(m2.flatten(),[X])
    dc2 = gradient.jacobian(m2.flatten(),[X])
    
    df = function([X],dm1+dc1+dw1+dm2+dc2)
    res = df((np.random.randn(n_samp,dim)*0.1+3.).astype(np.float32))
    print'dif mean,dif cov, w', np.abs(res[0]-res[3]).mean(),np.abs(res[1]-res[4]).mean(),np.abs(res[2]).sum()

    
test_1_gmm(1000,10)

dif mean,dif cov, w 1.25845e-08 0.0146019 1.0
dif mean,dif cov, w 1.37446e-10 0.0001 0.0


In [4]:
X = T.fmatrix("X")
m1,c1,w1 =  get_gmm(X,2,10)

m2 = T.mean(X,axis=0).reshape((1,-1))
c2 = (T.std(X,axis=0)+0.0001).reshape((1,-1))
dm1 = gradient.jacobian(m1.flatten(),[X])
dc1 = gradient.jacobian(c1.flatten(),[X])
dw1 = gradient.jacobian(w1.flatten(),[X])

df = function([X],dm1+dc1+dw1)

In [5]:
data = (np.random.randn(1000,10)*0.1+3.).astype(np.float32)
%timeit df(data)

1 loop, best of 3: 211 ms per loop


In [None]:
gradient.numeric_grad()