In [1]:
import theano
from theano import gradient,function
import numpy as np
import theano.tensor as T
from sklearn import mixture
from gmm_op import GMMOp,calc_log_prob_gauss_vector

Using cuDNN version 5105 on context None
Mapped name None to device cuda: GeForce GT 740M (0000:01:00.0)


# Tests

In [2]:

def gen_mixture(ns,dim,sigm=.3):
    ns = np.array(ns)
    means = []
    covs = []
    weights = ns.astype(np.float32)/ns.sum()
    res = np.zeros((int(sum(ns)),dim),dtype=np.float32)
    i = 0
    for n in ns:
        covs.append(sigm*(np.random.rand(dim)+1.))
        means.append(10*(np.random.rand(dim)))
        res[i:i+n] = np.random.randn(n,dim)*covs[-1][None,:]+means[-1][None,:]
        covs[-1] = np.std(res[i:i+n],0)**2
        means[-1] = np.mean(res[i:i+n],0)
        i=i+n
    return res,np.array(means),np.array(covs),weights

In [3]:
def test_calc_log_prob_gauss_vector(gm_num=2,ndim=10):
    print 'test calc_log_prob_gauss_vector'
    gmm = mixture.GaussianMixture(covariance_type='diag',
                                   n_components=gm_num,
                                   max_iter=2000,
                                   warm_start=False)
    Y,m,c,w = T.fmatrix(),T.fmatrix(),T.fmatrix(),T.fvector()
    f = function([Y,m,c,w],calc_log_prob_gauss_vector(Y,m,c,w),allow_input_downcast=True)
    print '|loglikelihood_pred-loglikelihood_true|/|loglikelihood_true|:'
    res = []
    for i in range(10):
        data,_,_,_=gen_mixture(np.random.randint(100,1000,gm_num),ndim)
        gmm.fit(data)
        p = f(data,gmm.means_,gmm.covariances_,gmm.weights_)
        p1 = gmm.score_samples(data)
        res.append(np.linalg.norm(p1-p)/np.linalg.norm(p1))
    res = np.array(res)
    print 'mean:',res.mean()
    print 'max:',res.max()
            
test_calc_log_prob_gauss_vector()

test calc_log_prob_gauss_vector
|loglikelihood_pred-loglikelihood_true|/|loglikelihood_true|:
mean: 1.47698129864e-06
max: 1.71692864846e-06


In [4]:
def test_max_likelihood(verbose=True,ndim=10,sigm=0.1):  
    print 'test d(calc_log_prob_gauss_vector)/d(m,c,w) = 0 for 1 component'
    Y,m,c = T.fmatrix(),T.fmatrix(),T.fmatrix()
    lag = T.sum(calc_log_prob_gauss_vector(Y,m,c))
    jac = T.grad(lag,[m,c])
    f = function([Y,m,c],jac+[lag],allow_input_downcast=True)
    print '|dm|/|m|,|dc|/|c|:'
    res = []
    for i in range(10):
        data,m,c,w = gen_mixture([100],ndim,sigm=sigm)
        d = f(data,m,c)
        n = [np.linalg.norm(d[0])/np.linalg.norm(m),\
            np.linalg.norm(d[1])/np.linalg.norm(c)]
        res.append(n)
    res = np.array(res)
    print 'mean:',res.mean(0)
    print 'max:',res.max(0)
    
test_max_likelihood(ndim=2)

test d(calc_log_prob_gauss_vector)/d(m,c,w) = 0 for 1 component
|dm|/|m|,|dc|/|c|:
mean: [ 0.00078738  0.01692911]
max: [ 0.00191541  0.04728827]


In [5]:
def test_lagrangian(gm_num=2,ndim=10,sigm=0.3):    
    print 'test d_lagrangian / d(mcwl) = 0'
    Y,m,c,w,l = T.fmatrix(),T.fmatrix(),T.fmatrix(),T.fvector(),T.fscalar()
    lag = GMMOp.build_lagrangian(Y,m,c,w,l)
    jac = T.grad(lag,[m,c,w,l])
    f = function([Y,m,c,w,l],jac+[lag],allow_input_downcast=True)
    print '|dm|/|m|,|dc|/|c|,|dw|/|w|:'
    res = []
    for i in range(10):
        data,m,c,w = gen_mixture(np.random.randint(100,1000,gm_num),ndim,sigm=sigm)
        l = np.float32(len(data))
        d = f(data,m,c,w,l)
        n = [np.linalg.norm(d[0])/np.linalg.norm(m),\
            np.linalg.norm(d[1])/np.linalg.norm(c),\
            np.linalg.norm(d[1])/np.linalg.norm(w),\
            float(d[-2])]
        res.append(n)
        break
    res = np.array(res)
    print 'mean:',res.mean(0)
    print 'max:',res.max(0)
test_lagrangian(gm_num=2,ndim=10,sigm=0.1)

test d_lagrangian / d(mcwl) = 0
|dm|/|m|,|dc|/|c|,|dw|/|w|:
mean: [ 0.02143326  0.49187019  0.06646995  0.        ]
max: [ 0.02143326  0.49187019  0.06646995  0.        ]


In [6]:
def test_build_lin_system(ndim=10,sigm=0.1):    
    print 'test build_linear_system (solution=true_solution) for 1 component'
    Y = T.fvector()
    m = T.mean(Y.reshape((-1,ndim)),axis=0).flatten()
    c = T.std(Y.reshape((-1,ndim)),axis=0).flatten()**2
    w = T.ones(1)
    l = T.reshape(Y.shape[0]//ndim,(1,))
    mcwl = T.concatenate((m,c,w,l))    
    N,M = GMMOp(1,ndim).build_linear_system(Y,mcwl)
    dmcwl = function([Y],gradient.jacobian(mcwl,[Y])+[N,M])
    print '|dm|/|m|,|dc|/|c|:'
    rres = []
    for i in range(10):
        data,_,_,_=gen_mixture([1000],ndim,sigm=sigm)
        res = dmcwl(data.flatten().astype(np.float32))
        sol = np.linalg.solve(res[2],res[1])
        n = [(np.linalg.norm(sol[:ndim]-res[0][:ndim])/np.linalg.norm(res[0][ndim])),\
            (np.linalg.norm(sol[ndim:2*ndim]-res[0][ndim:2*ndim])/np.linalg.norm(res[0][ndim:2*ndim]))]
        rres.append(n)
    rres = np.array(rres)
    print 'mean:',rres.mean(0)
    print 'max:',rres.max(0)
test_build_lin_system(ndim=2)

test build_linear_system (solution=true_solution) for 1 component
|dm|/|m|,|dc|/|c|:
mean: [  6.33881461e-06   1.53918478e-07]
max: [  1.24029597e-05   2.25462702e-07]


In [7]:
def test_solve_linear_system(gm_num = 2,ndim=10,sigm=0.01):    
    print 'test solve_linear_system (solution=true_solution)'
    Y = T.fmatrix()
    m = T.fmatrix()
    c = T.fmatrix()
    w = T.fvector()
    l = T.reshape(Y.shape[0]//ndim,(1,))
    mcwl = T.concatenate((m.flatten(),c.flatten(),w,l)) 
    gmm_op = GMMOp(gm_num,ndim)
    N,M = gmm_op.build_linear_system(Y.flatten(),mcwl)
    sol = gmm_op.solve_linear_system(N,M)
    f = function([Y,m,c,w],[sol,N,M],allow_input_downcast=True)
    rres = []
    print '|MX-N|/|N|:'
    for i in range(10):
        data,m,c,w=gen_mixture(np.random.randint(100,1000,gm_num),ndim,sigm=sigm)
        res = f(data,m,c,w)
        sol = res[0]
        M = res[2]
        N = res[1]
        n = np.linalg.norm(M.dot(sol)-N)/np.linalg.norm(N)
        rres.append(n)
    rres = np.array(rres)
    print 'mean:',rres.mean(0)
    print 'max:',rres.max(0)
test_solve_linear_system(ndim=1,sigm=1.)
test_solve_linear_system(ndim=3,sigm=.001)
test_solve_linear_system(ndim=3,sigm=1.)

test solve_linear_system (solution=true_solution)
|MX-N|/|N|:
mean: 6.13701605898e-15
max: 2.42491727137e-14
test solve_linear_system (solution=true_solution)
|MX-N|/|N|:
mean: 1.25172743456e-16
max: 1.61621995936e-16
test solve_linear_system (solution=true_solution)
|MX-N|/|N|:
mean: 3.57353042104e-16
max: 7.55003776977e-16


In [8]:
def test_gmm_1(ndim=10,sigm=0.3):   
    print 'test gradient for 1 component'
    X = T.fmatrix("X")
    mcw = GMMOp(1,ndim)(X.flatten())    
    m2 = T.mean(X,axis=0).flatten()
    c2 = (T.std(X,axis=0)**2).flatten()
    mcw1 = T.concatenate((m2,c2,T.ones(1)))
    d_mcw = gradient.jacobian(mcw,[X])[0]
    d_mcw1 = gradient.jacobian(mcw1,[X])[0]
    f = function([X],[d_mcw,d_mcw1],allow_input_downcast=True)
    print'|d_true-d_predicted|'
    rres = []
    for i in range(10):
        data,_,_,_=gen_mixture([1000],ndim,sigm=sigm)
        res = f(data.astype(np.float32))
        n = [np.abs(res[0][:ndim]-res[1][:ndim]).mean()/np.abs(res[1][:ndim]).mean(),\
            np.abs(res[0][ndim:2*ndim]-res[1][ndim:2*ndim]).mean()/np.abs(res[1][ndim:2*ndim]).mean()]
        rres.append(n)
    rres = np.array(rres)
    print 'mean:',rres.mean(0)
    print 'max:',rres.max(0)

test_gmm_1()

test gradient for 1 component
|d_true-d_predicted|
mean: [  3.82526736e-07   1.01172018e-05]
max: [  5.66779590e-07   1.23075915e-05]


In [14]:
from gmm_op import GMM

def test_numeric(gm_num=2,ndim=5,sigm=0.001):    
    print 'get numerical gradient and compare it with symbolic'
    X = T.fvector()
    gmm = GMM(gm_num)
    f = function([X],T.sum(GMMOp(gm_num,ndim,gmm)(X)),allow_input_downcast=True)
    gmm_op = T.sum(GMMOp(gm_num,ndim,gmm)(X))
    fg = function([X],[gmm_op]+T.grad(gmm_op,[X]),allow_input_downcast=True)
    
    def calc_num_grad(X,eps):
        f0,sym_grad = tuple(fg(X))
        num_grad = np.zeros_like(sym_grad)
        for i in range(len(X)):
            dX = np.zeros_like(X)
            dX[i] = eps
            num_grad[i] = f(X+dX)
        num_grad = (num_grad-f0)/eps
        return sym_grad,num_grad
    res = []
    print 'sigma,|sym-num|,|sym-num|/|num|'
    for i in range(10):
        data,_,_,_ = gen_mixture(np.random.randint(10,100,gm_num),ndim,sigm=sigm)
        sym_grad,num_grad = calc_num_grad(data.flatten(),1e-2)
        n = [sigm,np.linalg.norm(sym_grad-num_grad),np.linalg.norm(sym_grad-num_grad)/np.linalg.norm(num_grad)]
        sigm = sigm*3.
        res.append(n)
    res = np.array(res)
    np.set_printoptions(4)
    print res
    print 'mean:',res.mean(0)
    print 'max:',res.max(0)
    
test_numeric()

get numerical gradient and compare it with symbolic
sigma,|sym-num|,|sym-num|/|num|
[[  1.0000e-03   5.8509e-03   1.4531e-02]
 [  3.0000e-03   3.8714e-03   4.3418e-03]
 [  9.0000e-03   3.3928e-03   3.9363e-03]
 [  2.7000e-02   2.6152e-03   3.0358e-03]
 [  8.1000e-02   4.3255e-03   4.7403e-03]
 [  2.4300e-01   3.7373e-03   4.0104e-03]
 [  7.2900e-01   5.2553e-03   6.3273e-03]
 [  2.1870e+00   6.2033e-03   4.1314e-03]
 [  6.5610e+00   9.2817e-02   2.1186e-02]
 [  1.9683e+01   5.8645e-01   4.4235e-02]]
mean: [ 2.9524  0.0715  0.011 ]
max: [ 19.683    0.5865   0.0442]


In [13]:
gm_num=2
ndim = 10
X = T.fvector()
gmm = GMM(gm_num)
gmm_op = T.sum(GMMOp(gm_num,ndim,gmm)(X))
fg = function([X],[gmm_op]+T.grad(gmm_op,[X]),allow_input_downcast=True)
%timeit fg(gen_mixture(np.random.randint(100,1000,gm_num),ndim,sigm=0.1)[0].flatten())

10 loops, best of 3: 94.8 ms per loop
