In [1]:
import theano
from theano import gradient,function
import numpy as np
import theano.tensor as T
from sklearn import mixture
from gmm_op import GMMOp,calc_log_prob_gmm,get_gmm

Using cuDNN version 5105 on context None
Mapped name None to device cuda: GeForce GT 740M (0000:01:00.0)


# Tests

In [2]:
def gen_mixture(ns,dim,sigm=.3):
    ns = np.array(ns)
    means = []
    covs = []
    weights = ns.astype(np.float32)/ns.sum()
    res = np.zeros((int(sum(ns)),dim),dtype=np.float32)
    i = 0
    for n in ns:
        covs.append(sigm*(np.random.rand(dim)+1.))
        means.append(10*(np.random.rand(dim)))
        res[i:i+n] = np.random.randn(n,dim)*covs[-1][None,:]+means[-1][None,:]
        covs[-1] = np.std(res[i:i+n],0)**2
        means[-1] = np.mean(res[i:i+n],0)
        i=i+n
    return res,np.array(means),np.array(covs),weights

In [4]:
def test_calc_log_prob_gmm(gm_num=2,ndim=10):
    print 'test calc_log_prob_gauss_vector'
    gmm = mixture.GaussianMixture(covariance_type='diag',
                                   n_components=gm_num,
                                   max_iter=2000,
                                   warm_start=False)
    Y,m,c,w = T.fmatrix(),T.fmatrix(),T.fmatrix(),T.fvector()
    f = function([Y,m,c,w],calc_log_prob_gmm(Y,m,c,w),allow_input_downcast=True)
    print '|loglikelihood_pred-loglikelihood_true|/|loglikelihood_true|:'
    res = []
    for i in range(10):
        data,_,_,_=gen_mixture(np.random.randint(100,1000,gm_num),ndim)
        gmm.fit(data)
        p = f(data,gmm.means_,gmm.covariances_,gmm.weights_)
        p1 = gmm.score_samples(data)
        res.append(np.linalg.norm(p1-p)/np.linalg.norm(p1))
    res = np.array(res)
    print 'mean:',res.mean()
    print 'max:',res.max()
            
test_calc_log_prob_gmm()

test calc_log_prob_gauss_vector
|loglikelihood_pred-loglikelihood_true|/|loglikelihood_true|:
mean: 1.49185173912e-06
max: 1.94841947864e-06


In [4]:
def test_max_likelihood(verbose=True,ndim=10,sigm=0.1):  
    print 'test d(calc_log_prob_gauss_vector)/d(m,c,w) = 0 for 1 component'
    Y,m,c = T.fmatrix(),T.fmatrix(),T.fmatrix()
    lag = T.sum(calc_log_prob_gauss_vector(Y,m,c))
    jac = T.grad(lag,[m,c])
    f = function([Y,m,c],jac+[lag],allow_input_downcast=True)
    print '|dm|/|m|,|dc|/|c|:'
    res = []
    for i in range(10):
        data,m,c,w = gen_mixture([100],ndim,sigm=sigm)
        d = f(data,m,c)
        n = [np.linalg.norm(d[0])/np.linalg.norm(m),\
            np.linalg.norm(d[1])/np.linalg.norm(c)]
        res.append(n)
    res = np.array(res)
    print 'mean:',res.mean(0)
    print 'max:',res.max(0)
    
test_max_likelihood(ndim=2)

test d(calc_log_prob_gauss_vector)/d(m,c,w) = 0 for 1 component
|dm|/|m|,|dc|/|c|:
mean: [ 0.00053602  0.02163022]
max: [ 0.00157368  0.0420338 ]


In [7]:
def test_lagrangian(gm_num=2,ndim=10,sigm=0.3):    
    print 'test d_lagrangian / d(mcwl) = 0'
    Y,m,c,w,l = T.fmatrix(),T.fmatrix(),T.fmatrix(),T.fvector(),T.fscalar()
    lag = GMMOp.build_lagrangian(Y,m,c,w,l)
    jac = T.grad(lag,[m,c,w,l])
    f = function([Y,m,c,w,l],jac+[lag],allow_input_downcast=True)
    print '|dm|/|m|,|dc|/|c|,|dw|/|w|:'
    res = []
    for i in range(10):
        data,m,c,w = gen_mixture(np.random.randint(100,1000,gm_num),ndim,sigm=sigm)
        l = np.float32(len(data))
        d = f(data,m,c,w,l)
        n = [np.linalg.norm(d[0])/np.linalg.norm(m),\
            np.linalg.norm(d[1])/np.linalg.norm(c),\
            np.linalg.norm(d[2])/np.linalg.norm(w),\
            float(d[-2])]
        res.append(n)
    res = np.array(res)
    print 'mean:',res.mean(0)
    print 'max:',res.max(0)
test_lagrangian(gm_num=2,ndim=10,sigm=0.1)

test d_lagrangian / d(mcwl) = 0
|dm|/|m|,|dc|/|c|,|dw|/|w|:
[[  1.8076e-02   2.7011e-01   5.2894e+03   0.0000e+00]
 [  1.3794e-02   2.8369e-01   3.2106e+03   0.0000e+00]
 [  1.1687e-02   3.5540e-01   5.4209e+03   0.0000e+00]
 [  7.5737e-03   7.1341e-02   3.2941e+03   0.0000e+00]
 [  6.3758e-03   1.1993e-01   3.0442e+03   0.0000e+00]
 [  1.3199e-02   2.1930e-01   5.1110e+03   0.0000e+00]
 [  5.7152e-03   6.8865e-02   1.9481e+03   0.0000e+00]
 [  1.1502e-02   2.2135e-01   4.0975e+03   0.0000e+00]
 [  1.1639e-03   2.6775e-02   1.1708e+03   0.0000e+00]
 [  4.9045e-03   8.7987e-02   2.4895e+03   0.0000e+00]]
mean: [  9.3991e-03   1.7248e-01   3.5076e+03   0.0000e+00]
max: [  1.8076e-02   3.5540e-01   5.4209e+03   0.0000e+00]


In [6]:
def test_build_lin_system(ndim=10,sigm=0.1):    
    print 'test build_linear_system (solution=true_solution) for 1 component'
    Y = T.fvector()
    m = T.mean(Y.reshape((-1,ndim)),axis=0).flatten()
    c = T.std(Y.reshape((-1,ndim)),axis=0).flatten()**2
    w = T.ones(1)
    l = T.reshape(Y.shape[0]//ndim,(1,))
    mcwl = T.concatenate((m,c,w,l))    
    N,M = GMMOp(1,ndim).build_linear_system(Y,mcwl)
    dmcwl = function([Y],gradient.jacobian(mcwl,[Y])+[N,M])
    print '|dm|/|m|,|dc|/|c|:'
    rres = []
    for i in range(10):
        data,_,_,_=gen_mixture([1000],ndim,sigm=sigm)
        res = dmcwl(data.flatten().astype(np.float32))
        sol = np.linalg.solve(res[2],res[1])
        n = [(np.linalg.norm(sol[:ndim]-res[0][:ndim])/np.linalg.norm(res[0][ndim])),\
            (np.linalg.norm(sol[ndim:2*ndim]-res[0][ndim:2*ndim])/np.linalg.norm(res[0][ndim:2*ndim]))]
        rres.append(n)
    rres = np.array(rres)
    print 'mean:',rres.mean(0)
    print 'max:',rres.max(0)
test_build_lin_system(ndim=2)

test build_linear_system (solution=true_solution) for 1 component
|dm|/|m|,|dc|/|c|:
mean: [  1.53074482e-05   1.03449397e-07]
max: [  4.45331832e-05   2.21478088e-07]


In [7]:
def test_solve_linear_system(gm_num = 2,ndim=10,sigm=0.01):    
    print 'test solve_linear_system (solution=true_solution)'
    Y = T.fmatrix()
    m = T.fmatrix()
    c = T.fmatrix()
    w = T.fvector()
    l = T.reshape(Y.shape[0]//ndim,(1,))
    mcwl = T.concatenate((m.flatten(),c.flatten(),w,l)) 
    gmm_op = GMMOp(gm_num,ndim)
    N,M = gmm_op.build_linear_system(Y.flatten(),mcwl)
    sol = gmm_op.solve_linear_system(N,M)
    f = function([Y,m,c,w],[sol,N,M],allow_input_downcast=True)
    rres = []
    print '|MX-N|/|N|:'
    for i in range(10):
        data,m,c,w=gen_mixture(np.random.randint(100,1000,gm_num),ndim,sigm=sigm)
        res = f(data,m,c,w)
        sol = res[0]
        M = res[2]
        N = res[1]
        n = np.linalg.norm(M.dot(sol)-N)/np.linalg.norm(N)
        rres.append(n)
    rres = np.array(rres)
    print 'mean:',rres.mean(0)
    print 'max:',rres.max(0)
test_solve_linear_system(ndim=1,sigm=1.)
test_solve_linear_system(ndim=3,sigm=.001)
test_solve_linear_system(ndim=3,sigm=1.)

test solve_linear_system (solution=true_solution)
|MX-N|/|N|:
mean: 1.97428532133e-14
max: 6.02046725027e-14
test solve_linear_system (solution=true_solution)
|MX-N|/|N|:
mean: 1.1618514181e-16
max: 2.02084835299e-16
test solve_linear_system (solution=true_solution)
|MX-N|/|N|:
mean: 3.17812330367e-16
max: 6.97815972011e-16


In [8]:
def test_gmm_1(ndim=10,sigm=0.3):   
    print 'test gradient for 1 component'
    X = T.fmatrix("X")
    mcw = GMMOp(1,ndim)(X.flatten())    
    m2 = T.mean(X,axis=0).flatten()
    c2 = (T.std(X,axis=0)**2).flatten()
    mcw1 = T.concatenate((m2,c2,T.ones(1)))
    d_mcw = gradient.jacobian(mcw,[X])[0]
    d_mcw1 = gradient.jacobian(mcw1,[X])[0]
    f = function([X],[d_mcw,d_mcw1],allow_input_downcast=True)
    print'|d_true-d_predicted|'
    rres = []
    for i in range(10):
        data,_,_,_=gen_mixture([1000],ndim,sigm=sigm)
        res = f(data.astype(np.float32))
        n = [np.abs(res[0][:ndim]-res[1][:ndim]).mean()/np.abs(res[1][:ndim]).mean(),\
            np.abs(res[0][ndim:2*ndim]-res[1][ndim:2*ndim]).mean()/np.abs(res[1][ndim:2*ndim]).mean()]
        rres.append(n)
    rres = np.array(rres)
    print 'mean:',rres.mean(0)
    print 'max:',rres.max(0)

test_gmm_1()

test gradient for 1 component
|d_true-d_predicted|
mean: [  4.51154705e-07   1.04997853e-05]
max: [  5.29037720e-07   1.16022975e-05]


In [5]:
from gmm_op import GMM

def test_numeric(gm_num=2,ndim=5,sigm=0.00001,use_approx_grad=False):    
    print 'get numerical gradient and compare it with symbolic'
    X = T.fvector()
    gmm = GMM(gm_num)
    rvec  =T.fvector()
    gmm_op = T.sum(rvec*GMMOp(gm_num,ndim,gmm,use_approx_grad=use_approx_grad)(X))
    f = function([rvec,X],gmm_op,allow_input_downcast=True)
    fg = function([rvec,X],[gmm_op]+T.grad(gmm_op,[X]),allow_input_downcast=True)
    
    def calc_num_grad(rvec,X,eps):
        f0,sym_grad = tuple(fg(rvec,X))
        num_grad = np.zeros_like(sym_grad)
        for i in range(len(X)):
            dX = np.zeros_like(X)
            dX[i] = eps
            num_grad[i] = f(rvec,X+dX)
        num_grad = (num_grad-f0)/eps
        return sym_grad,num_grad
    res = []
    print 'sigma,|sym-num|,|sym-num|/|num|'
    for i in range(10):
        data,m,c,w = gen_mixture(np.random.randint(10,100,gm_num),ndim,sigm=sigm)
        rvec = np.random.rand(m.size+c.size+w.size)
        rvec = rvec/np.linalg.norm(rvec)
        sym_grad,num_grad = calc_num_grad(rvec,data.flatten(),1e-2)
        n = [sigm,np.linalg.norm(sym_grad-num_grad),np.linalg.norm(sym_grad-num_grad)/np.linalg.norm(num_grad)]
        sigm = sigm*4.
        res.append(n)
    res = np.array(res)
    np.set_printoptions(4)
    print res
    print 'mean:',res.mean(0)
    print 'max:',res.max(0)
    
print 'precision for accurate grad'
test_numeric(use_approx_grad=False)

precision for accurate grad
get numerical gradient and compare it with symbolic
sigma,|sym-num|,|sym-num|/|num|
[[  1.0000e-05   1.0453e-03   1.1546e-02]
 [  4.0000e-05   3.8007e-04   2.9452e-03]
 [  1.6000e-04   7.7114e-04   2.9547e-03]
 [  6.4000e-04   3.8398e-04   2.0677e-03]
 [  2.5600e-03   3.7115e-04   1.9296e-03]
 [  1.0240e-02   5.8450e-04   1.6323e-03]
 [  4.0960e-02   1.2951e-03   2.9518e-03]
 [  1.6384e-01   9.9572e-04   4.9606e-03]
 [  6.5536e-01   8.5524e-04   4.9148e-03]
 [  2.6214e+00   1.5866e-03   4.0232e-03]]
mean: [ 0.3495  0.0008  0.004 ]
max: [  2.6214e+00   1.5866e-03   1.1546e-02]


In [6]:

print 'precision for aprox grad'
test_numeric(use_approx_grad=True)

precision for aprox grad
get numerical gradient and compare it with symbolic
sigma,|sym-num|,|sym-num|/|num|
[[  1.0000e-05   9.0289e-04   1.1586e-02]
 [  4.0000e-05   5.4461e-04   6.8984e-03]
 [  1.6000e-04   2.2084e-03   3.0132e-02]
 [  6.4000e-04   9.0620e-04   1.0367e-02]
 [  2.5600e-03   2.2757e-03   1.6295e-02]
 [  1.0240e-02   5.4758e-04   3.3898e-03]
 [  4.0960e-02   3.9302e-04   3.1437e-03]
 [  1.6384e-01   1.3891e-03   9.3739e-03]
 [  6.5536e-01   6.8958e-04   6.6171e-03]
 [  2.6214e+00   2.5208e-03   6.7992e-03]]
mean: [ 0.3495  0.0012  0.0105]
max: [  2.6214e+00   2.5208e-03   3.0132e-02]


In [11]:
gm_num=2
ndim = 10
X = T.fvector()
gmm_op = T.sum(GMMOp(gm_num,ndim,use_approx_grad=False)(X))
fg = function([X],[gmm_op]+T.grad(gmm_op,[X]),allow_input_downcast=True)
print 'pergomance for accurate grad'
%timeit fg(gen_mixture(np.array([1000 for i in range(gm_num)]),ndim,sigm=0.1)[0].flatten())

pergomance for accurate grad
1 loop, best of 3: 204 ms per loop


In [12]:
gm_num=2
ndim = 10
X = T.fvector()
gmm_op = T.sum(GMMOp(gm_num,ndim,use_approx_grad=True)(X))
fg = function([X],[gmm_op]+T.grad(gmm_op,[X]),allow_input_downcast=True)
print 'pergomance for approx grad'
%timeit fg(gen_mixture(np.array([1000 for i in range(gm_num)]),ndim,sigm=0.1)[0].flatten())

pergomance for approx grad
10 loops, best of 3: 42.7 ms per loop


In [3]:
def get_gmm_fit_test(gm_num,ndim,sigm=0.01):
    X = T.fmatrix()
    f = function([X],list(get_gmm(X,gm_num,ndim)),allow_input_downcast=True)
    res = []
    for i in range(10):
        data,m,c,w = gen_mixture(np.random.randint(10,100,gm_num),ndim,sigm=sigm)
        gmm = mixture.GaussianMixture(gm_num,covariance_type='diag',max_iter=2000).fit(data)
        r = f(data)
        n = [np.abs(gmm.means_-r[0]).sum(),np.abs(gmm.covariances_-r[1]).sum(),np.abs(gmm.weights_-r[2]).sum()]
        res.append(n)
    np.set_printoptions(2)
    print np.array(res)
get_gmm_fit_test(2,10)

[[  2.56e-06   1.00e-10   2.26e-08]
 [  2.02e-06   9.35e-11   2.22e-08]
 [  7.90e+01   1.83e-03   9.70e-01]
 [  2.35e-06   1.22e-10   2.98e-08]
 [  2.06e-06   9.35e-11   2.98e-08]
 [  1.15e-06   5.10e-11   2.98e-08]
 [  6.40e+01   2.65e-03   6.52e-02]
 [  8.65e+01   1.39e-03   9.73e-01]
 [  6.14e+01   2.03e-03   3.57e-01]
 [  2.42e-06   8.57e-11   3.61e-09]]
