# Gradient test

### RBF kernel

In [1]:
import numpy
import GPy
N, Q = 10, 3
X = numpy.random.randn(N-5,Q)
print(X)

[[-1.14286666  1.34806687  0.27537821]
 [ 0.32409023 -0.15853099 -0.50530328]
 [-0.02898061 -0.49979855 -1.26812569]
 [-1.12519145  0.04920702  0.07601425]
 [-1.82195009  0.14846591  1.05584206]]


In [2]:
kern = GPy.kern.RBF(Q, ARD=True)
print(kern.K(X))

[[ 1.          0.08081185  0.02963271  0.42166351  0.28517271]
 [ 0.08081185  1.          0.66264869  0.2891669   0.02819938]
 [ 0.02963271  0.66264869  1.          0.1911114   0.01091185]
 [ 0.42166351  0.2891669   0.1911114   1.          0.48302168]
 [ 0.28517271  0.02819938  0.01091185  0.48302168  1.        ]]


In [3]:
grad = GPy.kern.RBF(Q, ARD=True).gradients_X(numpy.ones((1,1)), X)
print(grad)

[[-0.0692962  -2.13256696  0.05935193]
 [-1.66422144 -0.07132292 -0.4605477 ]
 [-0.05621706  0.7857855   1.66692146]
 [ 0.5691603   0.86126763  0.26472866]
 [ 1.22057438  0.55683675 -1.53045434]]


In [4]:
check = GPy.models.GradientChecker(kern.K,lambda x: grad,x0 = X.copy())
check.checkgrad(verbose=1)

            Name             |     Ratio     |  Difference   |  Analytical   |   Numerical   |   dF_ratio    
-------------------------------------------------------------------------------------------------------------
[92m GradientChecker.X[[0 0]] [0m   |   1.000000    |   0.000000    |   0.069296    |   0.069296    |     1e-08     
[92m GradientChecker.X[[0 1]] [0m   |   1.000000    |   0.000000    |   2.132567    |   2.132567    |     4e-07     
[92m GradientChecker.X[[0 2]] [0m   |   1.000000    |   0.000000    |   -0.059352   |   -0.059352   |     1e-08     
[92m GradientChecker.X[[1 0]] [0m   |   1.000000    |   0.000000    |   1.664221    |   1.664221    |     3e-07     
[92m GradientChecker.X[[1 1]] [0m   |   1.000000    |   0.000000    |   0.071323    |   0.071323    |     1e-08     
[92m GradientChecker.X[[1 2]] [0m   |   1.000000    |   0.000000    |   0.460548    |   0.460548    |     9e-08     
[92m GradientChecker.X[[2 0]] [0m   |   1.000000    |   0.000000

True

### Linear kernel

In [5]:
N, Q = 10, 3
X = numpy.random.randn(N-5,Q)
print(X)

[[ 0.04044369 -0.28062934  0.55990156]
 [ 0.22187517  0.97275164  1.10664674]
 [ 0.687717   -1.36220048  0.10916885]
 [-0.77231975 -0.94676323 -0.83693363]
 [ 0.85988246  1.04961513 -1.50357279]]


In [6]:
kern = GPy.kern.Linear(Q, ARD=True)
print(kern.K(X))

[[ 0.39387827  0.35560404  0.47121104 -0.23414636 -1.10162873]
 [ 0.35560404  2.22014135 -1.05168406 -2.01851392 -0.45212253]
 [ 0.47121104 -1.05168406  2.34046265  0.66717682 -1.00257376]
 [-0.23414636 -2.01851392  0.66717682  2.19329629 -0.39945058]
 [-1.10162873 -0.45212253 -1.00257376 -0.39945058  4.1018209 ]]


In [7]:
grad = kern.gradients_X(numpy.ones((5,5)), X)
print(grad)

[[ 2.07519714 -1.13445255 -1.12957854]
 [ 2.07519714 -1.13445255 -1.12957854]
 [ 2.07519714 -1.13445255 -1.12957854]
 [ 2.07519714 -1.13445255 -1.12957854]
 [ 2.07519714 -1.13445255 -1.12957854]]


In [8]:
check = GPy.models.GradientChecker(kern.K,lambda x: grad,x0 = X.copy())
check.checkgrad(verbose=1)

            Name             |     Ratio     |  Difference   |  Analytical   |   Numerical   |   dF_ratio    
-------------------------------------------------------------------------------------------------------------
[92m GradientChecker.X[[0 0]] [0m   |   1.000000    |   0.000000    |   -2.075197   |   -2.075197   |     2e-06     
[92m GradientChecker.X[[0 1]] [0m   |   1.000000    |   0.000000    |   1.134453    |   1.134453    |     1e-06     
[92m GradientChecker.X[[0 2]] [0m   |   1.000000    |   0.000000    |   1.129579    |   1.129579    |     1e-06     
[92m GradientChecker.X[[1 0]] [0m   |   1.000000    |   0.000000    |   -2.075197   |   -2.075197   |     2e-06     
[92m GradientChecker.X[[1 1]] [0m   |   1.000000    |   0.000000    |   1.134453    |   1.134453    |     1e-06     
[92m GradientChecker.X[[1 2]] [0m   |   1.000000    |   0.000000    |   1.129579    |   1.129579    |     1e-06     
[92m GradientChecker.X[[2 0]] [0m   |   1.000000    |   0.000000

True

### Kx

In [9]:
N, Q = 10, 3
X = numpy.random.randn(N-5,Q)
print(X)

[[ 1.62550013  0.87716588 -0.48312099]
 [-0.99091366  0.1363673  -0.07468656]
 [-0.12669421  0.55855895 -1.44955149]
 [-0.54234978 -1.26102945 -0.69858721]
 [ 0.58789571  0.33036005 -1.40275248]]


In [10]:
kern1 = GPy.kern.Linear(Q, ARD=True)
print(kern1.K(X))

[[ 3.64507654 -1.4550309   0.98431615 -1.6502195   1.92310428]
 [-1.4550309   1.00608401  0.30997421  0.41763369 -0.43273682]
 [ 0.98431615  0.30997421  2.42923903  0.37699142  2.14340453]
 [-1.6502195   0.41763369  0.37699142  2.37236265  0.24450608]
 [ 1.92310428 -0.43273682  2.14340453  0.24450608  2.42247365]]


In [11]:
grad1 = kern1.gradients_X(numpy.ones((5,5)), X)
print(grad)

[[ 2.07519714 -1.13445255 -1.12957854]
 [ 2.07519714 -1.13445255 -1.12957854]
 [ 2.07519714 -1.13445255 -1.12957854]
 [ 2.07519714 -1.13445255 -1.12957854]
 [ 2.07519714 -1.13445255 -1.12957854]]


In [12]:
kern2 = GPy.kern.RBF(Q, ARD=True)
print(kern2.K(X))

[[ 1.          0.02280875  0.12836961  0.00947651  0.32933796]
 [ 0.02280875  1.          0.24470856  0.28038556  0.11683231]
 [ 0.12836961  0.24470856  1.          0.13215109  0.75393297]
 [ 0.00947651  0.28038556  0.13215109  1.          0.11614544]
 [ 0.32933796  0.11683231  0.75393297  0.11614544  1.        ]]


In [13]:
grad2 = kern2.gradients_X(numpy.ones((5,5)), X)
print(grad2)

[[-1.29374359 -0.51628536 -0.83931171]
 [ 1.16277159 -0.4978693  -1.35170167]
 [ 0.99454031 -0.94984344  1.19005116]
 [ 0.16195003  1.67473143 -0.00810346]
 [-1.02551834  0.28926666  1.00906568]]


In [14]:
kern3 = GPy.kern.Poly(Q)
print(kern3.K(X))

[[  1.00225590e+02  -9.42155649e-02   7.81326585e+00  -2.74903313e-01
    2.49765774e+01]
 [ -9.42155649e-02   8.07323046e+00   2.24795823e+00   2.84899757e+00
    1.82538205e-01]
 [  7.81326585e+00   2.24795823e+00   4.03267549e+01   2.61092082e+00
    3.10599552e+01]
 [ -2.74903313e-01   2.84899757e+00   2.61092082e+00   3.83533066e+01
    1.92748527e+00]
 [  2.49765774e+01   1.82538205e-01   3.10599552e+01   1.92748527e+00
    4.00885494e+01]]


In [15]:
grad3 = kern3.gradients_X(numpy.ones((1,1)), X)
print(grad3)

NotImplementedError: 

In [16]:
kern4 = GPy.kern.RatQuad(Q, ARD=True)
print(kern4.K(X))

[[ 1.          0.04375555  0.10729796  0.03122698  0.22447035]
 [ 0.04375555  1.          0.17250426  0.19379396  0.10097229]
 [ 0.10729796  0.17250426  1.          0.10936823  0.60802003]
 [ 0.03122698  0.19379396  0.10936823  1.          0.10059497]
 [ 0.22447035  0.10097229  0.60802003  0.10059497  1.        ]]


In [17]:
grad4 = kern4.gradients_X(numpy.ones((5,5)), X)
print(grad4)

[[-0.83137416 -0.35172088 -0.51688311]
 [ 0.69916162 -0.30384521 -0.79232393]
 [ 1.29369531 -0.7722175   0.72728716]
 [ 0.09915786  0.9904015   0.01914757]
 [-1.26064063  0.43738209  0.56277231]]


In [18]:
kern5 = GPy.kern.MLP(Q, ARD=True)
print(kern5.K(X))

[[ 0.61523964 -0.07046487  0.25978387 -0.0835593   0.3978331 ]
 [-0.07046487  0.4651355   0.23376619  0.25575934  0.09944874]
 [ 0.25978387  0.23376619  0.56372283  0.20260241  0.50282416]
 [-0.0835593   0.25575934  0.20260241  0.56077705  0.18266788]
 [ 0.3978331   0.09944874  0.50282416  0.18266788  0.56337554]]


In [19]:
grad5 = kern5.gradients_X(numpy.ones((5,5)), X)
print(grad5)

[[-0.51832308 -0.14329229 -0.98143711]
 [ 0.63075401  0.09357599 -1.44167473]
 [ 0.2210103  -0.24361108 -0.19853239]
 [ 0.26910491  0.75358425 -0.90491092]
 [-0.36408723 -0.00226811 -0.24410971]]


In [20]:
kern6 = GPy.kern.Matern52(Q, ARD=True)
print(kern6.K(X))

[[ 1.          0.04219396  0.13328554  0.02535416  0.28682667]
 [ 0.04219396  1.          0.22168133  0.24890975  0.12431927]
 [ 0.13328554  0.22168133  1.          0.13620651  0.6746466 ]
 [ 0.02535416  0.24890975  0.13620651  1.          0.12378255]
 [ 0.28682667  0.12431927  0.6746466   0.12378255  1.        ]]


In [21]:
grad6 = kern6.gradients_X(numpy.ones((5,5)), X)
print(grad6)

[[-1.07748686 -0.44343946 -0.6513506 ]
 [ 0.93568572 -0.3711685  -1.04281994]
 [ 1.13118188 -0.844475    0.93409662]
 [ 0.15835221  1.32174897 -0.00641965]
 [-1.14773294  0.33733398  0.76649358]]


In [22]:
kern = kern1 + kern2 + kern4 + kern5 + kern6# + kern3
print(kern.K(X))

[[ 7.26031618 -1.41673752  1.61305312 -1.66772115  3.16157237]
 [-1.41673752  4.47121951  1.18263455  1.3964823   0.00883578]
 [ 1.61305312  1.18263455  5.99296187  0.95731965  4.68282829]
 [-1.66772115  1.3964823   0.95731965  5.9331397   0.76769693]
 [ 3.16157237  0.00883578  4.68282829  0.76769693  5.9858492 ]]


In [23]:
grad = kern.gradients_X(numpy.ones((5,5)), X)
print(grad)

[[ -2.61405131  -0.17189251 -11.20637998]
 [  4.53524932   0.20353846 -12.84591772]
 [  4.74730418  -1.52730155  -5.5644949 ]
 [  1.79544139   6.02331162  -9.11768392]
 [ -2.69110277   2.34456009  -6.1231756 ]]


In [24]:
check = GPy.models.GradientChecker(kern.K,lambda x: grad,x0 = X.copy())
check.checkgrad(verbose=1)

            Name             |     Ratio     |  Difference   |  Analytical   |   Numerical   |   dF_ratio    
-------------------------------------------------------------------------------------------------------------
[92m GradientChecker.X[[0 0]] [0m   |   1.000000    |   0.000000    |   2.614051    |   2.614051    |     1e-07     
[92m GradientChecker.X[[0 1]] [0m   |   1.000000    |   0.000000    |   0.171893    |   0.171893    |     7e-09     
[92m GradientChecker.X[[0 2]] [0m   |   1.000000    |   0.000000    |   11.206380   |   11.206380   |     4e-07     
[92m GradientChecker.X[[1 0]] [0m   |   1.000000    |   0.000000    |   -4.535249   |   -4.535249   |     2e-07     
[92m GradientChecker.X[[1 1]] [0m   |   1.000000    |   0.000000    |   -0.203538   |   -0.203538   |     8e-09     
[92m GradientChecker.X[[1 2]] [0m   |   1.000000    |   0.000000    |   12.845918   |   12.845918   |     5e-07     
[92m GradientChecker.X[[2 0]] [0m   |   1.000000    |   0.000000

True

Using lambda function notation:

In [35]:
check = GPy.models.GradientChecker(lambda a: kern.K(a),lambda b: grad,x0 = X.copy())
check.checkgrad(verbose=1)

            Name             |     Ratio     |  Difference   |  Analytical   |   Numerical   |   dF_ratio    
-------------------------------------------------------------------------------------------------------------
[92m GradientChecker.X[[0 0]] [0m   |   1.000000    |   0.000000    |   2.614051    |   2.614051    |     1e-07     
[92m GradientChecker.X[[0 1]] [0m   |   1.000000    |   0.000000    |   0.171893    |   0.171893    |     7e-09     
[92m GradientChecker.X[[0 2]] [0m   |   1.000000    |   0.000000    |   11.206380   |   11.206380   |     4e-07     
[92m GradientChecker.X[[1 0]] [0m   |   1.000000    |   0.000000    |   -4.535249   |   -4.535249   |     2e-07     
[92m GradientChecker.X[[1 1]] [0m   |   1.000000    |   0.000000    |   -0.203538   |   -0.203538   |     8e-09     
[92m GradientChecker.X[[1 2]] [0m   |   1.000000    |   0.000000    |   12.845918   |   12.845918   |     5e-07     
[92m GradientChecker.X[[2 0]] [0m   |   1.000000    |   0.000000

True

### Function 2 gradient

In [26]:
def f2_likelihood(N,Q,Kx,X,W):
    
    alpha = 1
    Kx_inv = np.linalg.inv(Kx)
    part1 = 0.5*Q*np.log(np.linalg.det(Kx_inv))
    part2 = 0.5*np.trace(Kx_inv*np.matmul(X[1:N],np.transpose(X[1:N])))
    part3 = alpha*np.linalg.norm(W,2)

    return part1 + part2 + part3

In [27]:
def f2_gradient(N,Q,Kx,X,W):
    
    Kx_inv = np.linalg.inv(Kx)    
    
    dF_dKx = 0.5*Q*np.transpose(Kx_inv)-0.5*np.transpose(np.matmul((Kx_inv*np.matmul(X[1:N],np.transpose(X[1:N]))),Kx_inv))
    
    dF_dW = np.sum(dF_dKx) * np.sum(Kx) + W/np.linalg.norm(W,2)
    
    return dF_dW

In [29]:
check = GPy.models.GradientChecker(lambda X: f2_likelihood(N,Q,Kx,X,weights),lambda x: f2_gradient(N,Q,Kx,x,W),x0 = X.copy())
check.checkgrad(verbose=1)

            Name             |     Ratio     |  Difference   |  Analytical   |   Numerical   |   dF_ratio    
-------------------------------------------------------------------------------------------------------------


NameError: name 'Kx' is not defined