diff --git a/pyGPs/Core/__init__.py b/pyGPs/Core/__init__.py index 1c0c201..a7beaff 100644 --- a/pyGPs/Core/__init__.py +++ b/pyGPs/Core/__init__.py @@ -1,9 +1,10 @@ -import inf -import cov -import mean -import lik -import gp -import opt +from __future__ import absolute_import +from . import inf +from . import cov +from . import mean +from . import lik +from . import gp +from . import opt diff --git a/pyGPs/Core/cov.py b/pyGPs/Core/cov.py index 269b075..b03a6e9 100644 --- a/pyGPs/Core/cov.py +++ b/pyGPs/Core/cov.py @@ -1,3 +1,9 @@ +from __future__ import division +from __future__ import print_function +from builtins import str +from builtins import range +from past.utils import old_div +from builtins import object #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -66,8 +72,8 @@ def __init__(self): def __repr__(self): strvalue =str(type(self))+': to get the kernel matrix or kernel derviatives use: \n'+\ - 'model.covfunc.getCovMatrix()\n'+\ - 'model.covfunc.getDerMatrix()' + 'model.covfunc.getCovMatrix()\n'+\ + 'model.covfunc.getDerMatrix()' return strvalue @@ -176,7 +182,7 @@ def __mul__(self,other): elif isinstance(other, Kernel): return ProductOfKernel(self,other) else: - print "only numbers and Kernels are supported operand types for *" + print("only numbers and Kernels are supported operand types for *") @@ -411,9 +417,9 @@ def getCovMatrix(self,x=None,z=None,mode=None): nn, D = z.shape A = np.zeros((nn, 1)) elif mode == 'train': # compute covariance matix for dataset x - A = spdist.cdist(x / ell, x / ell, 'sqeuclidean') + A = spdist.cdist(old_div(x, ell), old_div(x, ell), 'sqeuclidean') elif mode == 'cross': # compute covariance between data sets x and z - A = spdist.cdist(x / ell, z / ell, 'sqeuclidean') + A = spdist.cdist(old_div(x, ell), old_div(z, ell), 'sqeuclidean') dp = 2 * np.pi * np.sqrt(A) * ell / p A = np.exp(-0.5 * A) * np.cos(dp) return A @@ -426,9 +432,9 @@ def getDerMatrix(self,x=None,z=None,mode=None,der=None): nn, D = z.shape A = np.zeros((nn, 1)) elif mode == 'train': # compute covariance matix for dataset x - A = spdist.cdist(x / ell, x / ell, 'sqeuclidean') + A = spdist.cdist(old_div(x, ell), old_div(x, ell), 'sqeuclidean') elif mode == 'cross': # compute covariance between data sets x and z - A = spdist.cdist(x / ell, z / ell, 'sqeuclidean') + A = spdist.cdist(old_div(x, ell), old_div(z, ell), 'sqeuclidean') dp = 2 * np.pi * np.sqrt(A) * ell / p if der == 0: # compute derivative matrix wrt 1st parameter @@ -475,8 +481,8 @@ def __init__(self, Q=0, hyps=[], D=None): if D: self.hyp = np.random.random(Q*(1+2*D)) else: - self.hyp = hyps - self.para = [Q] + self.hyp = hyps + self.para = [Q] def initSMhypers(self, x, y): """ @@ -491,7 +497,7 @@ def initSMhypers(self, x, y): w = np.zeros(Q) m = np.zeros((D, Q)) s = np.zeros((D, Q)) - w[:] = np.std(y) / Q + w[:] = old_div(np.std(y), Q) hypinit = np.zeros(Q + 2 * D * Q) for i in range(D): @@ -503,10 +509,10 @@ def initSMhypers(self, x, y): else: d2[d2 == 0] = 1 minshift = np.min(np.min(np.sqrt(d2))) - nyquist = 0.5 / minshift + nyquist = old_div(0.5, minshift) m[i, :] = nyquist * np.random.ranf((1, Q)) maxshift = np.max(np.max(np.sqrt(d2))) - s[i, :] = 1. / np.abs(maxshift * np.random.ranf((1, Q))) + s[i, :] = old_div(1., np.abs(maxshift * np.random.ranf((1, Q)))) hypinit[:Q] = np.log(w) hypinit[Q + np.arange(0, Q * D)] = np.log(m[:]).T hypinit[Q + Q * D + np.arange(0, Q * D)] = np.log(s[:]).T @@ -519,7 +525,7 @@ def getCovMatrix(self,x=None,z=None,mode=None): nn, D = z.shape else: nn, D = x.shape - assert Q == len(self.hyp) / (1 + 2 * D) + assert Q == old_div(len(self.hyp), (1 + 2 * D)) w = np.exp(self.hyp[:Q]) m = np.exp(np.reshape(self.hyp[Q:Q + Q * D], (D, Q))) @@ -540,13 +546,13 @@ def getCovMatrix(self,x=None,z=None,mode=None): d2[:, :, j] = spdist.cdist(xslice, zslice, 'sqeuclidean') d = np.sqrt(d2) - k = lambda (d2v, dm): np.exp(-2 * np.pi ** 2 * d2v) * np.cos(2* np.pi * dm) # evaluation of the covariance + k = lambda d2v_dm: np.exp(-2 * np.pi ** 2 * d2v_dm[0]) * np.cos(2* np.pi * d2v_dm[1]) # evaluation of the covariance km = lambda dm: -2 * np.pi * np.tan(2 * np.pi * dm) * dm # remainder when differentiating w.r.t. m kv = lambda d2v: -d2v * (2 * np.pi) ** 2 # remainder when differentiating w.r.t. v A = 0. c = 1. - qq = range(Q) + qq = list(range(Q)) for q in qq: C = w[q] * c for j in range(D): @@ -561,7 +567,7 @@ def getDerMatrix(self,x=None,z=None,mode=None,der=None): nn, D = z.shape else: nn, D = x.shape - assert Q == len(self.hyp) / (1 + 2 * D) + assert Q == old_div(len(self.hyp), (1 + 2 * D)) w = np.exp(self.hyp[:Q]) m = np.exp(np.reshape(self.hyp[Q:Q + Q * D], (D, Q))) @@ -582,24 +588,24 @@ def getDerMatrix(self,x=None,z=None,mode=None,der=None): d2[:, :, j] = spdist.cdist(xslice, zslice, 'sqeuclidean') d = np.sqrt(d2) - k = lambda (d2v, dm): np.exp(-2 * np.pi ** 2 * d2v) * np.cos(2* np.pi * dm) # evaluation of the covariance + k = lambda d2v_dm1: np.exp(-2 * np.pi ** 2 * d2v_dm1[0]) * np.cos(2* np.pi * d2v_dm1[1]) # evaluation of the covariance km = lambda dm: -2 * np.pi * np.tan(2 * np.pi * dm) * dm # remainder when differentiating w.r.t. m kv = lambda d2v: -d2v * (2 * np.pi) ** 2 # remainder when differentiating w.r.t. v A = 0. c = 1. - qq = range(Q) + qq = list(range(Q)) if der < Q: # compute derivative matrix wrt w c = 1 qq = [der] elif der < Q + Q * D: # compute derivative matrix wrt sig p = (der - Q) % D - q = (der - Q - p) / D + q = old_div((der - Q - p), D) c = km(d[:, :, p] * m[p, q]) qq = [q] elif der < 2 * Q * D + Q: # compute derivative matrix wrt mu p = (der - (D + 1) * Q) % D - q = (der - (D + 1) * Q - p) / D + q = old_div((der - (D + 1) * Q - p), D) c = kv(d2[:, :, p] * v[p, q]) qq = [q] else: @@ -738,9 +744,9 @@ def getCovMatrix(self,x=None,z=None,mode=None): nn,D = z.shape A = np.zeros((nn,1)) elif mode == 'train': # compute covariance matix for dataset x - A = np.sqrt( spdist.cdist(x/ell, x/ell, 'sqeuclidean') ) + A = np.sqrt( spdist.cdist(old_div(x,ell), old_div(x,ell), 'sqeuclidean') ) elif mode == 'cross': # compute covariance between data sets x and z - A = np.sqrt( spdist.cdist(x/ell, z/ell, 'sqeuclidean') ) + A = np.sqrt( spdist.cdist(old_div(x,ell), old_div(z,ell), 'sqeuclidean') ) A = sf2 * self.pp(A,j,v,self.func) return A @@ -762,9 +768,9 @@ def getDerMatrix(self,x=None,z=None,mode=None,der=None): nn,D = z.shape A = np.zeros((nn,1)) elif mode == 'train': # compute covariance matix for dataset x - A = np.sqrt( spdist.cdist(x/ell, x/ell, 'sqeuclidean') ) + A = np.sqrt( spdist.cdist(old_div(x,ell), old_div(x,ell), 'sqeuclidean') ) elif mode == 'cross': # compute covariance between data sets x and z - A = np.sqrt( spdist.cdist(x/ell, z/ell, 'sqeuclidean') ) + A = np.sqrt( spdist.cdist(old_div(x,ell), old_div(z,ell), 'sqeuclidean') ) if der == 0: # compute derivative matrix wrt 1st parameter A = sf2 * self.dpp(A,j,v,self.func,self.dfunc) elif der == 1: # compute derivative matrix wrt 2nd parameter @@ -795,9 +801,9 @@ def getCovMatrix(self,x=None,z=None,mode=None): nn,D = z.shape A = np.zeros((nn,1)) elif mode == 'train': # compute covariance matix for training set - A = spdist.cdist(x/ell,x/ell,'sqeuclidean') + A = spdist.cdist(old_div(x,ell),old_div(x,ell),'sqeuclidean') elif mode == 'cross': # compute covariance between data sets x and z - A = spdist.cdist(x/ell,z/ell,'sqeuclidean') + A = spdist.cdist(old_div(x,ell),old_div(z,ell),'sqeuclidean') A = sf2 * np.exp(-0.5*A) return A @@ -810,9 +816,9 @@ def getDerMatrix(self,x=None,z=None,mode=None,der=None): nn,D = z.shape A = np.zeros((nn,1)) elif mode == 'train': # compute covariance matix for dataset x - A = spdist.cdist(x/ell,x/ell,'sqeuclidean') + A = spdist.cdist(old_div(x,ell),old_div(x,ell),'sqeuclidean') elif mode == 'cross': # compute covariance between data sets x and z - A = spdist.cdist(x/ell,z/ell,'sqeuclidean') + A = spdist.cdist(old_div(x,ell),old_div(z,ell),'sqeuclidean') if der == 0: # compute derivative matrix wrt 1st parameter A = sf2 * np.exp(-0.5*A) * A elif der == 1: # compute derivative matrix wrt 2nd parameter @@ -840,9 +846,9 @@ def getCovMatrix(self,x=None,z=None,mode=None): nn,D = z.shape A = np.zeros((nn,1)) elif mode == 'train': # compute covariance matix for dataset x - A = spdist.cdist(x/ell,x/ell,'sqeuclidean') + A = spdist.cdist(old_div(x,ell),old_div(x,ell),'sqeuclidean') elif mode == 'cross': # compute covariance between data sets x and z - A = spdist.cdist(x/ell,z/ell,'sqeuclidean') + A = spdist.cdist(old_div(x,ell),old_div(z,ell),'sqeuclidean') A = np.exp(-0.5*A) return A @@ -853,9 +859,9 @@ def getDerMatrix(self,x=None,z=None,mode=None,der=None): nn,D = z.shape A = np.zeros((nn,1)) elif mode == 'train': # compute covariance matix for dataset x - A = spdist.cdist(x/ell,x/ell,'sqeuclidean') + A = spdist.cdist(old_div(x,ell),old_div(x,ell),'sqeuclidean') elif mode == 'cross': # compute covariance between data sets x and z - A = spdist.cdist(x/ell,z/ell,'sqeuclidean') + A = spdist.cdist(old_div(x,ell),old_div(z,ell),'sqeuclidean') if der == 0: # compute derivative matrix wrt 1st parameter A = np.exp(-0.5*A) * A else: @@ -875,7 +881,7 @@ class RBFard(Kernel): ''' def __init__(self, D=None, log_ell_list=None, log_sigma=0.): if log_ell_list is None: - self.hyp = [0. for i in xrange(D)] + [log_sigma] + self.hyp = [0. for i in range(D)] + [log_sigma] else: self.hyp = log_ell_list + [log_sigma] @@ -885,7 +891,7 @@ def getCovMatrix(self,x=None,z=None,mode=None): n, D = x.shape if not z is None: nn, D = z.shape - ell = 1./np.exp(self.hyp[0:D]) # characteristic length scale + ell = old_div(1.,np.exp(self.hyp[0:D])) # characteristic length scale sf2 = np.exp(2.*self.hyp[D]) # signal variance if mode == 'self_test': # self covariances for the test cases nn,D = z.shape @@ -904,7 +910,7 @@ def getDerMatrix(self,x=None,z=None,mode=None,der=None): n, D = x.shape if not z is None: nn, D = z.shape - ell = 1./np.exp(self.hyp[0:D]) # characteristic length scale + ell = old_div(1.,np.exp(self.hyp[0:D])) # characteristic length scale sf2 = np.exp(2.*self.hyp[D]) # signal variance if mode == 'self_test': # self covariances for the test cases nn,D = z.shape @@ -919,10 +925,10 @@ def getDerMatrix(self,x=None,z=None,mode=None,der=None): if mode == 'self_test': A = A*0 elif mode == 'train': - tem = np.atleast_2d(x[:,der])/ell[der] + tem = old_div(np.atleast_2d(x[:,der]),ell[der]) A *= spdist.cdist(tem,tem,'sqeuclidean') elif mode == 'cross': - A *= spdist.cdist(np.atleast_2d(x[:,der]).T/ell[der],np.atleast_2d(z[:,der]).T/ell[der],'sqeuclidean') + A *= spdist.cdist(old_div(np.atleast_2d(x[:,der]).T,ell[der]),old_div(np.atleast_2d(z[:,der]).T,ell[der]),'sqeuclidean') elif der == D: # compute derivative matrix wrt magnitude parameter A = 2.*A else: @@ -1028,7 +1034,7 @@ class LINard(Kernel): ''' def __init__(self, D=None, log_ell_list=None): if log_ell_list is None: - self.hyp = [0. for i in xrange(D)] + self.hyp = [0. for i in range(D)] else: self.hyp = log_ell_list @@ -1042,7 +1048,7 @@ def getCovMatrix(self,x=None,z=None,mode=None): n, D = x.shape A = np.dot(x,x.T)+ np.eye(n)*1e-10 elif mode == 'cross': # compute covariance between data sets x and z - z = np.dot(z,np.diag(1./ell)) + z = np.dot(z,np.diag(old_div(1.,ell))) A = np.dot(x,z.T) return A @@ -1060,7 +1066,7 @@ def getDerMatrix(self,x=None,z=None,mode=None,der=None): elif mode == 'train': A = -2.*np.dot(np.atleast_2d(x[:,der]).T,np.atleast_2d(x[:,der])) elif mode == 'cross': - z = np.dot(z,np.diag(1./ell)) + z = np.dot(z,np.diag(old_div(1.,ell))) A = -2.*np.dot(np.atleast_2d(x[:,der]).T, np.atleast_2d(z[:,der])) # cross covariances else: raise Exception("Wrong derivative index in covLINard") @@ -1102,9 +1108,9 @@ def dfunc(self,d,t): # Note, this is func - d func/dt elif d == 3: return t elif d == 5: - return (1./3.)*(t + t*t) + return (old_div(1.,3.))*(t + t*t) elif d == 7: - return (1./15.)*(t + 3.*t*t + t*t*t) + return (old_div(1.,15.))*(t + 3.*t*t + t*t*t) else: raise Exception("Wrong value for d in Matern") @@ -1125,7 +1131,7 @@ def getCovMatrix(self,x=None,z=None,mode=None): try: assert(d in [1,3,5,7]) # check for valid values of d except AssertionError: - print "Warning: You specified d to be neither 1,3,5 nor 7. We set it to d=3. " + print("Warning: You specified d to be neither 1,3,5 nor 7. We set it to d=3. ") d = 3 if mode == 'self_test': # self covariances for the test cases nn,D = z.shape @@ -1151,7 +1157,7 @@ def getDerMatrix(self,x=None,z=None,mode=None,der=None): try: assert(d in [1,3,5,7]) # check for valid values of d except AssertionError: - print "Warning: You specified d to be neither 1,3,5 nor 7. We set to d=3. " + print("Warning: You specified d to be neither 1,3,5 nor 7. We set to d=3. ") d = 3 if mode == 'self_test': # self covariances for the test cases nn,D = z.shape @@ -1205,7 +1211,7 @@ def getCovMatrix(self,x=None,z=None,mode=None): elif mode == 'cross': # compute covariance between data sets x and z A = np.sqrt(spdist.cdist(x, z, 'sqeuclidean')) A = np.pi*A/p - A = np.sin(A)/ell + A = old_div(np.sin(A),ell) A = A * A A = sf2 *np.exp(-2.*A) return A @@ -1228,14 +1234,14 @@ def getDerMatrix(self,x=None,z=None,mode=None,der=None): A = np.sqrt(spdist.cdist(x, z, 'sqeuclidean')) A = np.pi*A/p if der == 0: # compute derivative matrix wrt 1st parameter - A = np.sin(A)/ell + A = old_div(np.sin(A),ell) A = A * A A = 4. *sf2 *np.exp(-2.*A) * A elif der == 1: # compute derivative matrix wrt 2nd parameter - R = np.sin(A)/ell + R = old_div(np.sin(A),ell) A = 4 * sf2/ell * np.exp(-2.*R*R)*R*np.cos(A)*A elif der == 2: # compute derivative matrix wrt 3rd parameter - A = np.sin(A)/ell + A = old_div(np.sin(A),ell) A = A * A A = 2. * sf2 * np.exp(-2.*A) else: @@ -1315,9 +1321,9 @@ def getCovMatrix(self,x=None,z=None,mode=None): nn,D = z.shape D2 = np.zeros((nn,1)) elif mode == 'train': # compute covariance matix for dataset x - D2 = spdist.cdist(x/ell, x/ell, 'sqeuclidean') + D2 = spdist.cdist(old_div(x,ell), old_div(x,ell), 'sqeuclidean') elif mode == 'cross': # compute covariance between data sets x and z - D2 = spdist.cdist(x/ell, z/ell, 'sqeuclidean') + D2 = spdist.cdist(old_div(x,ell), old_div(z,ell), 'sqeuclidean') A = sf2 * ( ( 1.0 + 0.5*D2/alpha )**(-alpha) ) return A @@ -1330,9 +1336,9 @@ def getDerMatrix(self,x=None,z=None,mode=None,der=None): nn,D = z.shape D2 = np.zeros((nn,1)) elif mode == 'train': # compute covariance matix for dataset x - D2 = spdist.cdist(x/ell, x/ell, 'sqeuclidean') + D2 = spdist.cdist(old_div(x,ell), old_div(x,ell), 'sqeuclidean') elif mode == 'cross': # compute covariance between data sets x and z - D2 = spdist.cdist(x/ell, z/ell, 'sqeuclidean') + D2 = spdist.cdist(old_div(x,ell), old_div(z,ell), 'sqeuclidean') if der == 0: # compute derivative matrix wrt 1st parameter A = sf2 * ( 1.0 + 0.5*D2/alpha )**(-alpha-1) * D2 elif der == 1: # compute derivative matrix wrt 2nd parameter @@ -1359,7 +1365,7 @@ class RQard(Kernel): ''' def __init__(self, D=None, log_ell_list=None, log_sigma=0., log_alpha=0.): if log_ell_list is None: - self.hyp = [0. for i in xrange(D)] + [ log_sigma, log_alpha ] + self.hyp = [0. for i in range(D)] + [ log_sigma, log_alpha ] else: self.hyp = log_ell_list + [ log_sigma, log_alpha ] @@ -1369,7 +1375,7 @@ def getCovMatrix(self,x=None,z=None,mode=None): n, D = x.shape if not z is None: nn, D = z.shape - ell = 1./np.exp(self.hyp[0:D]) # characteristic length scale + ell = old_div(1.,np.exp(self.hyp[0:D])) # characteristic length scale sf2 = np.exp(2.*self.hyp[D]) # signal variance alpha = np.exp(self.hyp[D+1]) if mode == 'self_test': # self covariances for the test cases @@ -1389,7 +1395,7 @@ def getDerMatrix(self,x=None,z=None,mode=None,der=None): n, D = x.shape if not z is None: nn, D = z.shape - ell = 1./np.exp(self.hyp[0:D]) # characteristic length scale + ell = old_div(1.,np.exp(self.hyp[0:D])) # characteristic length scale sf2 = np.exp(2.*self.hyp[D]) # signal variance alpha = np.exp(self.hyp[D+1]) if mode == 'self_test': # self covariances for the test cases @@ -1404,10 +1410,10 @@ def getDerMatrix(self,x=None,z=None,mode=None,der=None): if mode == 'self_test': A = D2*0 elif mode == 'train': - tmp = np.atleast_2d(x[:,der])/ell[der] + tmp = old_div(np.atleast_2d(x[:,der]),ell[der]) A = sf2 * ( 1.0 + 0.5*D2/alpha )**(-alpha-1) * spdist.cdist(tmp, tmp, 'sqeuclidean') elif mode == 'cross': - A = sf2 * ( 1.0 + 0.5*D2/alpha )**(-alpha-1) * spdist.cdist(np.atleast_2d(x[:,der]).T/ell[der], np.atleast_2d(z[:,der]).T/ell[der], 'sqeuclidean') + A = sf2 * ( 1.0 + 0.5*D2/alpha )**(-alpha-1) * spdist.cdist(old_div(np.atleast_2d(x[:,der]).T,ell[der]), old_div(np.atleast_2d(z[:,der]).T,ell[der]), 'sqeuclidean') elif der==D: # compute derivative matrix wrt magnitude parameter A = 2. * sf2 * ( ( 1.0 + 0.5*D2/alpha )**(-alpha) ) elif der==(D+1): # compute derivative matrix wrt magnitude parameter diff --git a/pyGPs/Core/gp.py b/pyGPs/Core/gp.py index d9cec56..7010005 100644 --- a/pyGPs/Core/gp.py +++ b/pyGPs/Core/gp.py @@ -1,3 +1,9 @@ +from __future__ import division +from __future__ import absolute_import +from builtins import str +from builtins import range +from builtins import object +from past.utils import old_div #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -42,8 +48,8 @@ import itertools import numpy as np import matplotlib.pyplot as plt -import inf, mean, lik, cov, opt -from tools import unique, jitchol, solve_chol +from . import inf, mean, lik, cov, opt +from .tools import unique, jitchol, solve_chol from copy import deepcopy import pyGPs @@ -183,7 +189,7 @@ def plotData_2d(self,x1,x2,t1,t2,p1,p2,axisvals=None): fig = plt.figure() plt.plot(x1[:,0], x1[:,1], 'b+', markersize = 12) plt.plot(x2[:,0], x2[:,1], 'r+', markersize = 12) - pc = plt.contour(t1, t2, np.reshape(p2/(p1+p2), (t1.shape[0],t1.shape[1]) )) + pc = plt.contour(t1, t2, np.reshape(old_div(p2,(p1+p2)), (t1.shape[0],t1.shape[1]) )) fig.colorbar(pc) plt.grid() if axisvals: @@ -375,7 +381,7 @@ def predict(self, xs, ys=None): L = self.posterior.L sW = self.posterior.sW - nz = range(len(alpha[:,0])) # non-sparse representation + nz = list(range(len(alpha[:,0]))) # non-sparse representation if L == []: # in case L is not provided, we compute it K = covfunc.getCovMatrix(x=x[nz,:], mode='train') #L = np.linalg.cholesky( (np.eye(nz) + np.dot(sW,sW.T)*K).T ) @@ -390,13 +396,13 @@ def predict(self, xs, ys=None): fs2 = np.zeros((ns,1)) lp = np.zeros((ns,1)) while nact<=ns-1: # process minibatches of test cases to save memory - id = range(nact,min(nact+nperbatch,ns)) # data points to process + id = list(range(nact,min(nact+nperbatch,ns))) # data points to process kss = covfunc.getCovMatrix(z=xs[id,:], mode='self_test') # self-variances Ks = covfunc.getCovMatrix(x=x[nz,:], z=xs[id,:], mode='cross') # cross-covariances ms = meanfunc.getMean(xs[id,:]) N = (alpha.shape)[1] # number of alphas (usually 1; more in case of sampling) Fmu = np.tile(ms,(1,N)) + np.dot(Ks.T,alpha[nz]) # conditional mean fs|f - fmu[id] = np.reshape(Fmu.sum(axis=1)/N,(len(id),1)) # predictive means + fmu[id] = np.reshape(old_div(Fmu.sum(axis=1),N),(len(id),1)) # predictive means if Ltril: # L is triangular => use Cholesky parameters (alpha,sW,L) V = np.linalg.solve(L.T,np.tile(sW,(1,len(id)))*Ks) fs2[id] = kss - np.array([(V*V).sum(axis=0)]).T # predictive variances @@ -408,9 +414,9 @@ def predict(self, xs, ys=None): Lp, Ymu, Ys2 = likfunc.evaluate(None,Fmu[:],Fs2[:],None,None,3) else: Lp, Ymu, Ys2 = likfunc.evaluate(np.tile(ys[id],(1,N)), Fmu[:], Fs2[:],None,None,3) - lp[id] = np.reshape( np.reshape(Lp,(np.prod(Lp.shape),N)).sum(axis=1)/N , (len(id),1) ) # log probability; sample averaging - ymu[id] = np.reshape( np.reshape(Ymu,(np.prod(Ymu.shape),N)).sum(axis=1)/N ,(len(id),1) ) # predictive mean ys|y and ... - ys2[id] = np.reshape( np.reshape(Ys2,(np.prod(Ys2.shape),N)).sum(axis=1)/N , (len(id),1) ) # .. variance + lp[id] = np.reshape( old_div(np.reshape(Lp,(np.prod(Lp.shape),N)).sum(axis=1),N) , (len(id),1) ) # log probability; sample averaging + ymu[id] = np.reshape( old_div(np.reshape(Ymu,(np.prod(Ymu.shape),N)).sum(axis=1),N) ,(len(id),1) ) # predictive mean ys|y and ... + ys2[id] = np.reshape( old_div(np.reshape(Ys2,(np.prod(Ys2.shape),N)).sum(axis=1),N) , (len(id),1) ) # .. variance nact = id[-1]+1 # set counter to index of next data point self.ym = ymu self.ys2 = ys2 @@ -465,7 +471,7 @@ def predict_with_posterior(self, post, xs, ys=None): L = post.L sW = post.sW - nz = range(len(alpha[:,0])) # non-sparse representation + nz = list(range(len(alpha[:,0]))) # non-sparse representation if L == []: # in case L is not provided, we compute it K = covfunc.getCovMatrix(x=x[nz,:], mode='train') #L = np.linalg.cholesky( (np.eye(nz) + np.dot(sW,sW.T)*K).T ) @@ -480,13 +486,13 @@ def predict_with_posterior(self, post, xs, ys=None): fs2 = np.zeros((ns,1)) lp = np.zeros((ns,1)) while nact<=ns-1: # process minibatches of test cases to save memory - id = range(nact,min(nact+nperbatch,ns)) # data points to process + id = list(range(nact,min(nact+nperbatch,ns))) # data points to process kss = covfunc.getCovMatrix(z=xs[id,:], mode='self_test') # self-variances Ks = covfunc.getCovMatrix(x=x[nz,:], z=xs[id,:], mode='cross') # cross-covariances ms = meanfunc.getMean(xs[id,:]) N = (alpha.shape)[1] # number of alphas (usually 1; more in case of sampling) Fmu = np.tile(ms,(1,N)) + np.dot(Ks.T,alpha[nz]) # conditional mean fs|f - fmu[id] = np.reshape(Fmu.sum(axis=1)/N,(len(id),1)) # predictive means + fmu[id] = np.reshape(old_div(Fmu.sum(axis=1),N),(len(id),1)) # predictive means if Ltril: # L is triangular => use Cholesky parameters (alpha,sW,L) V = np.linalg.solve(L.T,np.tile(sW,(1,len(id)))*Ks) fs2[id] = kss - np.array([(V*V).sum(axis=0)]).T # predictive variances @@ -498,9 +504,9 @@ def predict_with_posterior(self, post, xs, ys=None): [Lp, Ymu, Ys2] = likfunc.evaluate(None,Fmu[:],Fs2[:],None,None,3) else: [Lp, Ymu, Ys2] = likfunc.evaluate(np.tile(ys[id],(1,N)), Fmu[:], Fs2[:],None,None,3) - lp[id] = np.reshape( np.reshape(Lp,(np.prod(Lp.shape),N)).sum(axis=1)/N , (len(id),1) ) # log probability; sample averaging - ymu[id] = np.reshape( np.reshape(Ymu,(np.prod(Ymu.shape),N)).sum(axis=1)/N ,(len(id),1) ) # predictive mean ys|y and ... - ys2[id] = np.reshape( np.reshape(Ys2,(np.prod(Ys2.shape),N)).sum(axis=1)/N , (len(id),1) ) # .. variance + lp[id] = np.reshape( old_div(np.reshape(Lp,(np.prod(Lp.shape),N)).sum(axis=1),N) , (len(id),1) ) # log probability; sample averaging + ymu[id] = np.reshape( old_div(np.reshape(Ymu,(np.prod(Ymu.shape),N)).sum(axis=1),N) ,(len(id),1) ) # predictive mean ys|y and ... + ys2[id] = np.reshape( old_div(np.reshape(Ys2,(np.prod(Ys2.shape),N)).sum(axis=1),N) , (len(id),1) ) # .. variance nact = id[-1]+1 # set counter to index of next data point self.ym = ymu self.ys2 = ys2 @@ -784,7 +790,7 @@ def useLikelihood(self,newLik): :param str newLik: 'Logistic' ''' if newLik == "Logistic": - raise "Logistic likelihood is currently not implemented." + raise Exception("Logistic likelihood is currently not implemented.") #self.likfunc = lik.Logistic() else: raise Exception('Possible lik values are "Logistic".') @@ -830,8 +836,8 @@ def fitAndPredict(self, xs): xs = np.reshape(xs, (xs.shape[0],1)) predictive_vote = np.zeros((xs.shape[0],self.n_class)) - for i in xrange(self.n_class): # classifier for class i... - for j in xrange(i+1,self.n_class): # ...and class j + for i in range(self.n_class): # classifier for class i... + for j in range(i+1,self.n_class): # ...and class j x,y = self.createBinaryClass(i,j) model = GPC() if self.newPrior: @@ -868,8 +874,8 @@ def optimizeAndPredict(self, xs): xs = np.reshape(xs, (xs.shape[0],1)) predictive_vote = np.zeros((xs.shape[0],self.n_class)) - for i in xrange(self.n_class): # classifier for class i... - for j in xrange(i+1,self.n_class): # ...and class j + for i in range(self.n_class): # classifier for class i... + for j in range(i+1,self.n_class): # ...and class j x,y = self.createBinaryClass(i,j) model = GPC() if self.newPrior: @@ -904,7 +910,7 @@ def createBinaryClass(self, i,j): ''' class_i = [] class_j = [] - for index in xrange(len(self.y_all)): # check all classes + for index in range(len(self.y_all)): # check all classes target = self.y_all[index] if target == i: class_i.append(index) @@ -961,7 +967,7 @@ def setData(self, x, y, value_per_axis=5): # get range of x in each dimension # 5 uniformally selected value for each dimension gridAxis=[] - for d in xrange(x.shape[1]): + for d in range(x.shape[1]): column = x[:,d] mini = np.min(column) maxi = np.max(column) diff --git a/pyGPs/Core/inf.py b/pyGPs/Core/inf.py index e40f7ce..c3bc088 100644 --- a/pyGPs/Core/inf.py +++ b/pyGPs/Core/inf.py @@ -1,3 +1,9 @@ +from __future__ import division +from __future__ import absolute_import +from builtins import str +from builtins import range +from past.utils import old_div +from builtins import object #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -43,9 +49,9 @@ import numpy as np -import lik, cov +from . import lik, cov from copy import copy, deepcopy -from tools import solve_chol, brentmin, cholupdate, jitchol +from .tools import solve_chol, brentmin, cholupdate, jitchol np.seterr(all='ignore') @@ -72,7 +78,7 @@ def __init__(self): def __repr__(self): value = "posterior: to get the parameters of the posterior distribution use:\n"+\ "model.posterior.alpha\n"+"model.posterior.L\n"+"model.posterior.sW\n"+\ - "See documentation and gpml book chapter 2.3 and chapter 3.4.3 for these parameters." + "See documentation and gpml book chapter 2.3 and chapter 3.4.3 for these parameters." return value def __str__(self): @@ -96,11 +102,11 @@ def __init__(self, m, c, l): self.cov = [] self.lik = [] if m.hyp: - self.mean = [0 for i in xrange(len(m.hyp))] + self.mean = [0 for i in range(len(m.hyp))] if c.hyp: - self.cov = [0 for i in xrange(len(c.hyp))] + self.cov = [0 for i in range(len(c.hyp))] if l.hyp: - self.lik = [0 for i in xrange(len(l.hyp))] + self.lik = [0 for i in range(len(l.hyp))] def __str__(self): value = "Derivatives of mean, cov and lik functions:\n" +\ @@ -166,12 +172,12 @@ def _epComputeParams(self, K, y, ttau, tnu, likfunc, m, inffunc): Sigma = K - np.dot(V.T,V) mu = np.dot(Sigma,tnu) Dsigma = np.reshape(np.diag(Sigma),(np.diag(Sigma).shape[0],1)) - tau_n = 1/Dsigma - ttau # compute the log marginal likelihood - nu_n = mu/Dsigma-tnu + m*tau_n # vectors of cavity parameters - lZ = likfunc.evaluate(y, nu_n/tau_n, 1/tau_n, inffunc) - nlZ = np.log(np.diag(L)).sum() - lZ.sum() - np.dot(tnu.T,np.dot(Sigma,tnu))/2 \ - - np.dot((nu_n-m*tau_n).T,((ttau/tau_n*(nu_n-m*tau_n)-2*tnu) / (ttau+tau_n)))/2 \ - + (tnu**2/(tau_n+ttau)).sum()/2.- np.log(1.+ttau/tau_n).sum()/2. + tau_n = old_div(1,Dsigma) - ttau # compute the log marginal likelihood + nu_n = old_div(mu,Dsigma)-tnu + m*tau_n # vectors of cavity parameters + lZ = likfunc.evaluate(y, old_div(nu_n,tau_n), old_div(1,tau_n), inffunc) + nlZ = np.log(np.diag(L)).sum() - lZ.sum() - old_div(np.dot(tnu.T,np.dot(Sigma,tnu)),2) \ + - old_div(np.dot((nu_n-m*tau_n).T,(old_div((ttau/tau_n*(nu_n-m*tau_n)-2*tnu), (ttau+tau_n)))),2) \ + + old_div((old_div(tnu**2,(tau_n+ttau))).sum(),2.)- old_div(np.log(1.+old_div(ttau,tau_n)).sum(),2.) return Sigma, mu, nlZ[0], L def _logdetA(self,K,w,nargout): @@ -187,7 +193,7 @@ def _logdetA(self,K,w,nargout): u = np.diag(U) signU = np.prod(np.sign(u)) # sign of U detP = 1 # compute sign (and det) of the permutation matrix P - p = np.dot(P,np.array(range(n)).T) + p = np.dot(P,np.array(list(range(n))).T) for ii in range(n): if ii != p[ii]: detP = -detP @@ -215,7 +221,7 @@ def _Psi_line(self,s,dalpha,alpha,K,m,likfunc,y,inffunc): f = np.dot(K,alpha) + m [lp,dlp,d2lp] = likfunc.evaluate(y,f,None,inffunc,None,3) W = -d2lp - Psi = np.dot(alpha.T,(f-m))/2. - lp.sum() + Psi = old_div(np.dot(alpha.T,(f-m)),2.) - lp.sum() return Psi[0],alpha,f,dlp,W def _epfitcZ(self,d,P,R,nn,gg,ttau,tnu,d0,R0,P0,y,likfunc,m,inffunc): @@ -226,18 +232,18 @@ def _epfitcZ(self,d,P,R,nn,gg,ttau,tnu,d0,R0,P0,y,likfunc,m,inffunc): T = np.dot(np.dot(R,R0),P) # temporary variable diag_sigma = d + np.array([(T*T).sum(axis=0)]).T mu = nn + np.dot(P.T,gg) # post moments O(n*nu^2) - tau_n = 1./diag_sigma-ttau # compute the log marginal likelihood - nu_n = mu/diag_sigma - tnu + m*tau_n # vectors of cavity parameters - lZ = likfunc.evaluate(y, nu_n/tau_n, 1/tau_n, inffunc, None, 1) + tau_n = old_div(1.,diag_sigma)-ttau # compute the log marginal likelihood + nu_n = old_div(mu,diag_sigma) - tnu + m*tau_n # vectors of cavity parameters + lZ = likfunc.evaluate(y, old_div(nu_n,tau_n), old_div(1,tau_n), inffunc, None, 1) nu = gg.shape[0] - U = np.dot(R0,P0).T*np.tile(1/np.sqrt(d0+1/ttau),(1,nu)) + U = np.dot(R0,P0).T*np.tile(old_div(1,np.sqrt(d0+old_div(1,ttau))),(1,nu)) #L = np.linalg.cholesky(np.eye(nu)+np.dot(U.T,U)).T L = jitchol(np.eye(nu)+np.dot(U.T,U)).T - ld = 2.*np.log(np.diag(L)).sum() + (np.log(d0+1/ttau)).sum() + (np.log(ttau)).sum() + ld = 2.*np.log(np.diag(L)).sum() + (np.log(d0+old_div(1,ttau))).sum() + (np.log(ttau)).sum() t = np.dot(T,tnu); tnu_Sigma_tnu = np.dot(tnu.T,(d*tnu)) + np.dot(t.T,t) - nlZ = ld/2. - lZ.sum() -tnu_Sigma_tnu/2. \ - -np.dot((nu_n-m*tau_n).T,((ttau/tau_n*(nu_n-m*tau_n)-2.*tnu)/(ttau+tau_n)))/2. \ - + (tnu**2/(tau_n+ttau)).sum()/2. - (np.log(1+ttau/tau_n)).sum()/2. + nlZ = old_div(ld,2.) - lZ.sum() -old_div(tnu_Sigma_tnu,2.) \ + -old_div(np.dot((nu_n-m*tau_n).T,(old_div((ttau/tau_n*(nu_n-m*tau_n)-2.*tnu),(ttau+tau_n)))),2.) \ + + old_div((old_div(tnu**2,(tau_n+ttau))).sum(),2.) - old_div((np.log(1+old_div(ttau,tau_n))).sum(),2.) return nlZ,nu_n,tau_n def _epfitcRefresh(self,d0,P0,R0,R0P0,w,b): @@ -251,7 +257,7 @@ def _epfitcRefresh(self,d0,P0,R0,R0P0,w,b): rot180 = lambda A: np.rot90(np.rot90(A)) # little helper functions #chol_inv = lambda A: np.linalg.solve( rot180( np.linalg.cholesky(rot180(A)) ),np.eye(nu)) # chol(inv(A)) chol_inv = lambda A: np.linalg.solve( rot180( jitchol(rot180(A)) ),np.eye(nu)) # chol(inv(A)) - t = 1/(1+d0*w) # temporary variable O(n) + t = old_div(1,(1+d0*w)) # temporary variable O(n) d = d0*t # O(n) P = np.tile(t.T,(nu,1))*P0 # O(n*nu) T = np.tile((w*t).T,(nu,1))*R0P0 # temporary variable O(n*nu^2) @@ -265,21 +271,21 @@ def _epfitcUpdate(self,d,P_i,R,nn,gg,w,b,ii,w_i,b_i,m,d0,P0,R0): dbi = b_i-b[ii] hi = nn[ii] + m[ii] + np.dot(P_i.T,gg) # posterior mean of site i O(nu) t = 1+dwi*d[ii] - d[ii] = d[ii]/t # O(1) + d[ii] = old_div(d[ii],t) # O(1) nn[ii] = d[ii]*b_i # O(1) r = 1+d0[ii]*w[ii] - r = (r*r)/dwi + r*d0[ii] + r = old_div((r*r),dwi) + r*d0[ii] v = np.dot(R,np.dot(R0,P0[:,ii])) v = np.reshape(v,(v.shape[0],1)) - r = 1/(r+np.dot(v.T,v)) + r = old_div(1,(r+np.dot(v.T,v))) if r>0: R = cholupdate(R,np.sqrt(r)*np.dot(R.T,v),'-') else: R = cholupdate(R,np.sqrt(-r)*np.dot(R.T,v),'+') ttemp = np.dot(R0.T,np.dot(R.T,np.dot(R,np.dot(R0,P_i)))) - gg = gg + ((dbi-dwi*(hi-m[ii]))/t) * np.reshape(ttemp,(ttemp.shape[0],1)) # O(nu^2) + gg = gg + (old_div((dbi-dwi*(hi-m[ii])),t)) * np.reshape(ttemp,(ttemp.shape[0],1)) # O(nu^2) w[ii] = w_i; b[ii] = b_i; # update site parameters O(1) - P_i = P_i/t # O(nu) + P_i = old_div(P_i,t) # O(nu) return d,P_i,R,nn,gg,w,b def _mvmZ(self,x,RVdd,t): @@ -305,7 +311,7 @@ def _Psi_lineFITC(self,s,dalpha,alpha,V,d0,m,likfunc,y,inffunc): vargout = likfunc.evaluate(y,f,None,inffunc,None,3) lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2] W = -d2lp - Psi = np.dot(alpha.T,(f-m))/2. - lp.sum() + Psi = old_div(np.dot(alpha.T,(f-m)),2.) - lp.sum() return Psi[0],alpha,f,dlp,W def _fitcRefresh(self,d0,P0,R0,R0P0, w): @@ -319,7 +325,7 @@ def _fitcRefresh(self,d0,P0,R0,R0P0, w): rot180 = lambda A: np.rot90(np.rot90(A)) # little helper functions #chol_inv = lambda A: np.linalg.solve( rot180( np.linalg.cholesky(rot180(A)) ),np.eye(nu)) # chol(inv(A)) chol_inv = lambda A: np.linalg.solve( rot180( jitchol(rot180(A)) ),np.eye(nu)) # chol(inv(A)) - t = 1/(1+d0*w) # temporary variable O(n) + t = old_div(1,(1+d0*w)) # temporary variable O(n) d = d0*t # O(n) P = np.tile(t.T,(nu,1))*P0; # O(n*nu) T = np.tile((w*t).T,(nu,1))*R0P0; # temporary variable O(n*nu^2) @@ -344,22 +350,22 @@ def evaluate(self, meanfunc, covfunc, likfunc, x, y, nargout=1): sn2 = np.exp(2*likfunc.hyp[0]) # noise variance of likGauss #L = np.linalg.cholesky(K/sn2+np.eye(n)).T # Cholesky factor of covariance with noise - L = jitchol(K/sn2+np.eye(n)).T # Cholesky factor of covariance with noise - alpha = solve_chol(L,y-m)/sn2 + L = jitchol(old_div(K,sn2)+np.eye(n)).T # Cholesky factor of covariance with noise + alpha = old_div(solve_chol(L,y-m),sn2) post = postStruct() post.alpha = alpha # return the posterior parameters - post.sW = np.ones((n,1))/np.sqrt(sn2) # sqrt of noise precision vector + post.sW = old_div(np.ones((n,1)),np.sqrt(sn2)) # sqrt of noise precision vector post.L = L # L = chol(eye(n)+sW*sW'.*K) if nargout>1: # do we want the marginal likelihood? - nlZ = np.dot((y-m).T,alpha)/2. + np.log(np.diag(L)).sum() + n*np.log(2*np.pi*sn2)/2. # -log marg lik + nlZ = old_div(np.dot((y-m).T,alpha),2.) + np.log(np.diag(L)).sum() + n*np.log(2*np.pi*sn2)/2. # -log marg lik if nargout>2: # do we want derivatives? dnlZ = dnlZStruct(meanfunc, covfunc, likfunc) # allocate space for derivatives - Q = solve_chol(L,np.eye(n))/sn2 - np.dot(alpha,alpha.T) # precompute for convenience + Q = old_div(solve_chol(L,np.eye(n)),sn2) - np.dot(alpha,alpha.T) # precompute for convenience dnlZ.lik = [sn2*np.trace(Q)] if covfunc.hyp: for ii in range(len(covfunc.hyp)): - dnlZ.cov[ii] = (Q*covfunc.getDerMatrix(x=x, mode='train', der=ii)).sum()/2. + dnlZ.cov[ii] = old_div((Q*covfunc.getDerMatrix(x=x, mode='train', der=ii)).sum(),2.) if meanfunc.hyp: for ii in range(len(meanfunc.hyp)): dnlZ.mean[ii] = np.dot(-meanfunc.getDerMatrix(x, ii).T,alpha) @@ -398,37 +404,37 @@ def evaluate(self, meanfunc, covfunc, likfunc, x, y, nargout=1): V = np.linalg.solve(Luu.T,Ku) # V = inv(Luu')*Ku => V'*V = Q g_sn2 = diagK + sn2 - np.array([(V*V).sum(axis=0)]).T # g + sn2 = diag(K) + sn2 - diag(Q) #Lu = np.linalg.cholesky(np.eye(nu) + np.dot(V/np.tile(g_sn2.T,(nu,1)),V.T)).T # Lu'*Lu=I+V*diag(1/g_sn2)*V' - Lu = jitchol(np.eye(nu) + np.dot(V/np.tile(g_sn2.T,(nu,1)),V.T)).T # Lu'*Lu=I+V*diag(1/g_sn2)*V' - r = (y-m)/np.sqrt(g_sn2) - be = np.linalg.solve(Lu.T,np.dot(V,r/np.sqrt(g_sn2))) + Lu = jitchol(np.eye(nu) + np.dot(old_div(V,np.tile(g_sn2.T,(nu,1))),V.T)).T # Lu'*Lu=I+V*diag(1/g_sn2)*V' + r = old_div((y-m),np.sqrt(g_sn2)) + be = np.linalg.solve(Lu.T,np.dot(V,old_div(r,np.sqrt(g_sn2)))) iKuu = solve_chol(Luu,np.eye(nu)) # inv(Kuu + snu2*I) = iKuu post = postStruct() post.alpha = np.linalg.solve(Luu,np.linalg.solve(Lu,be)) # return the posterior parameters post.L = solve_chol(np.dot(Lu,Luu),np.eye(nu)) - iKuu # Sigma-inv(Kuu) - post.sW = np.ones((n,1))/np.sqrt(sn2) # unused for FITC prediction with gp.m + post.sW = old_div(np.ones((n,1)),np.sqrt(sn2)) # unused for FITC prediction with gp.m if nargout>1: # do we want the marginal likelihood - nlZ = np.log(np.diag(Lu)).sum() + (np.log(g_sn2).sum() + n*np.log(2*np.pi) + np.dot(r.T,r) - np.dot(be.T,be))/2. + nlZ = np.log(np.diag(Lu)).sum() + old_div((np.log(g_sn2).sum() + n*np.log(2*np.pi) + np.dot(r.T,r) - np.dot(be.T,be)),2.) if nargout>2: # do we want derivatives? dnlZ = dnlZStruct(meanfunc, covfunc, likfunc) # allocate space for derivatives - al = r/np.sqrt(g_sn2) - np.dot(V.T,np.linalg.solve(Lu,be))/g_sn2 # al = (Kt+sn2*eye(n))\y + al = old_div(r,np.sqrt(g_sn2)) - old_div(np.dot(V.T,np.linalg.solve(Lu,be)),g_sn2) # al = (Kt+sn2*eye(n))\y B = np.dot(iKuu,Ku) w = np.dot(B,al) - W = np.linalg.solve(Lu.T,V/np.tile(g_sn2.T,(nu,1))) + W = np.linalg.solve(Lu.T,old_div(V,np.tile(g_sn2.T,(nu,1)))) for ii in range(len(covfunc.hyp)): [ddiagKi,dKuui,dKui] = covfunc.getDerMatrix(x=x, mode='train', der=ii) # eval cov deriv R = 2.*dKui-np.dot(dKuui,B) v = ddiagKi - np.array([(R*B).sum(axis=0)]).T # diag part of cov deriv - dnlZ.cov[ii] = ( np.dot(ddiagKi.T,1./g_sn2) + np.dot(w.T,(np.dot(dKuui,w)-2.*np.dot(dKui,al))) \ - - np.dot(al.T,(v*al)) - np.dot(np.array([(W*W).sum(axis=0)]),v) - (np.dot(R,W.T)*np.dot(B,W.T)).sum() )/2. + dnlZ.cov[ii] = old_div(( np.dot(ddiagKi.T,old_div(1.,g_sn2)) + np.dot(w.T,(np.dot(dKuui,w)-2.*np.dot(dKui,al))) \ + - np.dot(al.T,(v*al)) - np.dot(np.array([(W*W).sum(axis=0)]),v) - (np.dot(R,W.T)*np.dot(B,W.T)).sum() ),2.) dnlZ.cov[ii] = dnlZ.cov[ii][0,0] - dnlZ.lik = sn2*((1./g_sn2).sum() - (np.array([(W*W).sum(axis=0)])).sum() - np.dot(al.T,al)) + dnlZ.lik = sn2*((old_div(1.,g_sn2)).sum() - (np.array([(W*W).sum(axis=0)])).sum() - np.dot(al.T,al)) dKuui = 2*snu2 R = -dKuui*B v = -np.array([(R*B).sum(axis=0)]).T # diag part of cov deriv - dnlZ.lik += (np.dot(w.T,np.dot(dKuui,w)) -np.dot(al.T,(v*al)) \ - - np.dot(np.array([(W*W).sum(axis=0)]),v) - (np.dot(R,W.T)*np.dot(B,W.T)).sum() )/2. + dnlZ.lik += old_div((np.dot(w.T,np.dot(dKuui,w)) -np.dot(al.T,(v*al)) \ + - np.dot(np.array([(W*W).sum(axis=0)]),v) - (np.dot(R,W.T)*np.dot(B,W.T)).sum() ),2.) dnlZ.lik = list(dnlZ.lik[0]) for ii in range(len(meanfunc.hyp)): dnlZ.mean[ii] = np.dot(-meanfunc.getDerMatrix(x, ii).T, al) @@ -469,7 +475,7 @@ def evaluate(self, meanfunc, covfunc, likfunc, x, y, nargout=1): vargout = likfunc.evaluate(y, f, None, inffunc, None, 3) lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2] W= -d2lp - Psi_new = np.dot(alpha.T,(f-m))/2. - lp.sum() # objective for last alpha + Psi_new = old_div(np.dot(alpha.T,(f-m)),2.) - lp.sum() # objective for last alpha vargout = - likfunc.evaluate(y, m, None, inffunc, None, 1) Psi_def = vargout[0] # objective for default init f==m if Psi_def < Psi_new: # if default is better, we use it @@ -508,27 +514,27 @@ def evaluate(self, meanfunc, covfunc, likfunc, x, y, nargout=1): post.sW = np.sqrt(np.abs(W))*np.sign(W) # preserve sign in case of negative if isWneg: [ldA,iA,post.L] = self._logdetA(K,W,3) - nlZ = np.dot(alpha.T,(f-m))/2. - lp.sum() + ldA/2. + nlZ = old_div(np.dot(alpha.T,(f-m)),2.) - lp.sum() + old_div(ldA,2.) nlZ = nlZ[0] else: sW = post.sW #post.L = np.linalg.cholesky(np.eye(n)+np.dot(sW,sW.T)*K).T post.L = jitchol(np.eye(n)+np.dot(sW,sW.T)*K).T - nlZ = np.dot(alpha.T,(f-m))/2. + (np.log(np.diag(post.L))-np.reshape(lp,(lp.shape[0],))).sum() + nlZ = old_div(np.dot(alpha.T,(f-m)),2.) + (np.log(np.diag(post.L))-np.reshape(lp,(lp.shape[0],))).sum() nlZ = nlZ[0] if nargout>2: # do we want derivatives? dnlZ = dnlZStruct(meanfunc, covfunc, likfunc) # allocate space for derivatives if isWneg: # switch between Cholesky and LU decomposition mode Z = -post.L # inv(K+inv(W)) - g = np.atleast_2d((iA*K).sum(axis=1)).T /2 # deriv. of ln|B| wrt W; g = diag(inv(inv(K)+diag(W)))/2 + g = old_div(np.atleast_2d((iA*K).sum(axis=1)).T,2) # deriv. of ln|B| wrt W; g = diag(inv(inv(K)+diag(W)))/2 else: Z = np.tile(sW,(1,n))*solve_chol(post.L,np.diag(np.reshape(sW,(sW.shape[0],)))) #sW*inv(B)*sW=inv(K+inv(W)) C = np.linalg.solve(post.L.T,np.tile(sW,(1,n))*K) # deriv. of ln|B| wrt W - g = np.atleast_2d((np.diag(K)-(C**2).sum(axis=0).T)).T /2. # g = diag(inv(inv(K)+W))/2 + g = old_div(np.atleast_2d((np.diag(K)-(C**2).sum(axis=0).T)).T,2.) # g = diag(inv(inv(K)+W))/2 dfhat = g* d3lp # deriv. of nlZ wrt. fhat: dfhat=diag(inv(inv(K)+W)).*d3lp/2 for ii in range(len(covfunc.hyp)): # covariance hypers dK = covfunc.getDerMatrix(x=x, mode='train', der=ii) - dnlZ.cov[ii] = (Z*dK).sum()/2. - np.dot(alpha.T,np.dot(dK,alpha))/2. # explicit part + dnlZ.cov[ii] = old_div((Z*dK).sum(),2.) - old_div(np.dot(alpha.T,np.dot(dK,alpha)),2.) # explicit part b = np.dot(dK,dlp) # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b dnlZ.cov[ii] -= np.dot(dfhat.T,b-np.dot(K,np.dot(Z,b))) # implicit part dnlZ.cov[ii] = dnlZ.cov[ii][0,0] @@ -599,7 +605,7 @@ def evaluate(self, meanfunc, covfunc, likfunc, x, y, nargout=1): vargout = likfunc.evaluate(y, f, None, inffunc, None, 3) lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2] W=-d2lp - Psi_new = np.dot(alpha.T,(f-m))/2. - lp.sum() # objective for last alpha + Psi_new = old_div(np.dot(alpha.T,(f-m)),2.) - lp.sum() # objective for last alpha vargout = - likfunc.evaluate(y, m, None, inffunc, None, 1) Psi_def = vargout[0] # objective for default init f==m if Psi_def < Psi_new: # if default is better, we use it @@ -618,7 +624,7 @@ def evaluate(self, meanfunc, covfunc, likfunc, x, y, nargout=1): if isWneg: # stabilise the Newton direction in case W has negative values W = np.maximum(W,0) # stabilise the Hessian to guarantee postive definiteness tol = 1e-8 # increase accuracy to also get the derivatives right - b = W*(f-m) + dlp; dd = 1/(1+W*d0) + b = W*(f-m) + dlp; dd = old_div(1,(1+W*d0)) RV = np.dot( chol_inv( np.eye(nu) + np.dot(V*np.tile((W*dd).T,(nu,1)),V.T)),V ) dalpha = dd*b - (W*dd)*np.dot(RV.T,np.dot(RV,(dd*b))) - alpha # Newt dir + line search vargout = brentmin(0,smax,Nline,thr,self._Psi_lineFITC,4,dalpha,alpha,V,d0,m,likfunc,y,inffunc) @@ -634,7 +640,7 @@ def evaluate(self, meanfunc, covfunc, likfunc, x, y, nargout=1): post = postStruct() post.alpha = np.dot(R0.T,np.dot(V,alpha)) # return the posterior parameters post.sW = np.sqrt(np.abs(W))*np.sign(W) # preserve sign in case of negative - dd = 1/(1+d0*W) # temporary variable O(n) + dd = old_div(1,(1+d0*W)) # temporary variable O(n) A = np.eye(nu) + np.dot(V*np.tile((W*dd).T,(nu,1)),V.T) # temporary variable O(n*nu^2) R0tV = np.dot(R0.T,V); B = R0tV*np.tile((W*dd).T,(nu,1)) # temporary variables O(n*nu^2) post.L = -np.dot(B,R0tV.T) # L = -R0'*V*inv(Kt+diag(1./ttau))*V'*R0, first part @@ -642,7 +648,7 @@ def evaluate(self, meanfunc, covfunc, likfunc, x, y, nargout=1): raise Exception('W is too negative; nlZ and dnlZ cannot be computed.') #nlZ = np.dot(alpha.T,(f-m))/2. - lp.sum() - np.log(dd).sum()/2. + \ # np.log(np.diag(np.linalg.cholesky(A).T)).sum() - nlZ = np.dot(alpha.T,(f-m))/2. - lp.sum() - np.log(dd).sum()/2. + \ + nlZ = old_div(np.dot(alpha.T,(f-m)),2.) - lp.sum() - old_div(np.log(dd).sum(),2.) + \ np.log(np.diag(jitchol(A).T)).sum() RV = np.dot(chol_inv(A),V) RVdd = RV * np.tile((W*dd).T,(nu,1)) # RVdd needed for dnlZ @@ -652,8 +658,8 @@ def evaluate(self, meanfunc, covfunc, likfunc, x, y, nargout=1): if nargout>2: # do we want derivatives? dnlZ = dnlZStruct(meanfunc, covfunc, likfunc) # allocate space for derivatives [d,P,R] = self._fitcRefresh(d0,Ku,R0,V,W) # g = diag(inv(inv(K)+W))/2 - g = d/2 + 0.5*np.atleast_2d((np.dot(np.dot(R,R0),P)**2).sum(axis=0)).T - t = W/(1+W*d0) + g = old_div(d,2) + 0.5*np.atleast_2d((np.dot(np.dot(R,R0),P)**2).sum(axis=0)).T + t = old_div(W,(1+W*d0)) dfhat = g*d3lp # deriv. of nlZ wrt. fhat: dfhat=diag(inv(inv(K)+W)).*d3lp/2 for ii in range(len(covfunc.hyp)): # covariance hypers @@ -663,7 +669,7 @@ def evaluate(self, meanfunc, covfunc, likfunc, x, y, nargout=1): v = ddiagK-w # v = diag(dK)-diag(dQ); dnlZ.cov[ii] = np.dot(ddiagK.T,t) - np.dot((RVdd*RVdd).sum(axis=0),v) # explicit part dnlZ.cov[ii] -= (np.dot(RVdd,dA)*np.dot(RVdd,R0tV.T)).sum() # explicit part - dnlZ.cov[ii] = 0.5*dnlZ.cov[ii] - np.dot(alpha.T,np.dot(dA,np.dot(R0tV,alpha))+v*alpha)/2. # explicit + dnlZ.cov[ii] = 0.5*dnlZ.cov[ii] - old_div(np.dot(alpha.T,np.dot(dA,np.dot(R0tV,alpha))+v*alpha),2.) # explicit b = np.dot(dA,np.dot(R0tV,dlp)) + v*dlp # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b KZb = self._mvmK(self._mvmZ(b,RVdd,t),V,d0) dnlZ.cov[ii] -= np.dot(dfhat.T,(b-KZb)) # implicit part @@ -684,7 +690,7 @@ def evaluate(self, meanfunc, covfunc, likfunc, x, y, nargout=1): t = np.array([(T*T).sum(axis=0)]).T z = np.dot(alpha.T,np.dot(T.T,np.dot(T,alpha))-t*alpha) - np.dot(np.array([(RVdd*RVdd).sum(axis=0)]),t) z += (np.dot(RVdd,T.T)**2).sum() - b = (t*dlp-np.dot(T.T,np.dot(T,dlp)))/2. + b = old_div((t*dlp-np.dot(T.T,np.dot(T,dlp))),2.) KZb = self._mvmK(self._mvmZ(b,RVdd,t),V,d0) z -= np.dot(dfhat.T,b-KZb) dnlZ.lik[ii] += z @@ -737,16 +743,16 @@ def evaluate(self, meanfunc, covfunc, likfunc, x, y, nargout=1): nlZ_old = np.inf; sweep = 0 # converged, max. sweeps or min. sweeps? while (np.abs(nlZ-nlZ_old) > tol and sweep < max_sweep) or (sweep < min_sweep): nlZ_old = nlZ; sweep += 1 - rperm = xrange(n) # randperm(n) + rperm = range(n) # randperm(n) for ii in rperm: # iterate EP updates (in random order) over examples - tau_ni = 1/Sigma[ii,ii] - ttau[ii]# first find the cavity distribution .. - nu_ni = mu[ii]/Sigma[ii,ii] + m[ii]*tau_ni - tnu[ii] # .. params tau_ni and nu_ni + tau_ni = old_div(1,Sigma[ii,ii]) - ttau[ii]# first find the cavity distribution .. + nu_ni = old_div(mu[ii],Sigma[ii,ii]) + m[ii]*tau_ni - tnu[ii] # .. params tau_ni and nu_ni # compute the desired derivatives of the indivdual log partition function - lZ,dlZ,d2lZ = likfunc.evaluate(y[ii], nu_ni/tau_ni, 1/tau_ni, inffunc, None, 3) + lZ,dlZ,d2lZ = likfunc.evaluate(y[ii], old_div(nu_ni,tau_ni), old_div(1,tau_ni), inffunc, None, 3) ttau_old = copy(ttau[ii]) # then find the new tilde parameters, keep copy of old - ttau[ii] = -d2lZ /(1.+d2lZ/tau_ni) + ttau[ii] = old_div(-d2lZ,(1.+old_div(d2lZ,tau_ni))) ttau[ii] = max(ttau[ii],0) # enforce positivity i.e. lower bound ttau by zero - tnu[ii] = ( dlZ + (m[ii]-nu_ni/tau_ni)*d2lZ )/(1.+d2lZ/tau_ni) + tnu[ii] = old_div(( dlZ + (m[ii]-old_div(nu_ni,tau_ni))*d2lZ ),(1.+old_div(d2lZ,tau_ni))) ds2 = ttau[ii] - ttau_old # finally rank-1 update Sigma .. si = np.reshape(Sigma[:,ii],(Sigma.shape[0],1)) Sigma = Sigma - ds2/(1.+ds2*si[ii])*np.dot(si,si.T) # takes 70# of total time @@ -769,17 +775,17 @@ def evaluate(self, meanfunc, covfunc, likfunc, x, y, nargout=1): Sigma = K - np.dot(V.T,V) mu = np.dot(Sigma,tnu) Dsigma = np.reshape(np.diag(Sigma),(np.diag(Sigma).shape[0],1)) - tau_n = 1/Dsigma-ttau # compute the log marginal likelihood - nu_n = mu/Dsigma-tnu # vectors of cavity parameters + tau_n = old_div(1,Dsigma)-ttau # compute the log marginal likelihood + nu_n = old_div(mu,Dsigma)-tnu # vectors of cavity parameters F = np.dot(alpha,alpha.T) - np.tile(sW,(1,n))* \ solve_chol(L,np.diag(np.reshape(sW,(sW.shape[0],)))) # covariance hypers for jj in range(len(covfunc.hyp)): dK = covfunc.getDerMatrix(x=x, mode='train', der=jj) - dnlZ.cov[jj] = -(F*dK).sum()/2. + dnlZ.cov[jj] = old_div(-(F*dK).sum(),2.) for ii in range(len(likfunc.hyp)): - dlik = likfunc.evaluate(y, nu_n/tau_n, 1/tau_n, inffunc, ii) + dlik = likfunc.evaluate(y, old_div(nu_n,tau_n), old_div(1,tau_n), inffunc, ii) dnlZ.lik[ii] = -dlik.sum() - junk,dlZ = likfunc.evaluate(y, nu_n/tau_n, 1/tau_n, inffunc, None, 2) # mean hyps + junk,dlZ = likfunc.evaluate(y, old_div(nu_n,tau_n), old_div(1,tau_n), inffunc, None, 2) # mean hyps for ii in range(len(meanfunc.hyp)): dm = meanfunc.getDerMatrix(x, ii) dnlZ.mean[ii] = -np.dot(dlZ.T,dm) @@ -859,19 +865,19 @@ def evaluate(self, meanfunc, covfunc, likfunc, x, y, nargout=1): while (np.abs(nlZ-nlZ_old) > tol and sweep < max_sweep) or (sweep < min_sweep): nlZ_old = nlZ sweep += 1 - rperm = range(n) # randperm(n) + rperm = list(range(n)) # randperm(n) for ii in rperm: # iterate EP updates (in random order) over examples p_i = np.reshape(P[:,ii],(P.shape[0],1)) t = np.dot(R,np.dot(R0,p_i)) # temporary variables sigma_i = d[ii] + np.dot(t.T,t); mu_i = nn[ii] + np.dot(p_i.T,gg) # post moments O(nu^2) - tau_ni = 1/sigma_i - ttau[ii] # first find the cavity distribution .. - nu_ni = mu_i/sigma_i + m[ii]*tau_ni - tnu[ii] # .. params tau_ni and nu_ni + tau_ni = old_div(1,sigma_i) - ttau[ii] # first find the cavity distribution .. + nu_ni = old_div(mu_i,sigma_i) + m[ii]*tau_ni - tnu[ii] # .. params tau_ni and nu_ni # compute the desired derivatives of the indivdual log partition function - vargout = likfunc.evaluate(y[ii], nu_ni/tau_ni, 1/tau_ni, inffunc, None, 3) + vargout = likfunc.evaluate(y[ii], old_div(nu_ni,tau_ni), old_div(1,tau_ni), inffunc, None, 3) lZ = vargout[0]; dlZ = vargout[1]; d2lZ = vargout[2] - ttau_i = -d2lZ /(1.+d2lZ/tau_ni) + ttau_i = old_div(-d2lZ,(1.+old_div(d2lZ,tau_ni))) ttau_i = max(ttau_i,0) # enforce positivity i.e. lower bound ttau by zero - tnu_i = ( dlZ + (m[ii]-nu_ni/tau_ni)*d2lZ )/(1.+d2lZ/tau_ni) + tnu_i = old_div(( dlZ + (m[ii]-old_div(nu_ni,tau_ni))*d2lZ ),(1.+old_div(d2lZ,tau_ni))) [d,P[:,ii],R,nn,gg,ttau,tnu] = self._epfitcUpdate(d,P[:,ii],R,nn,gg,ttau,tnu,ii,ttau_i,tnu_i,m,d0,Ku,R0)# update representation # recompute since repeated rank-one updates can destroy numerical precision @@ -885,7 +891,7 @@ def evaluate(self, meanfunc, covfunc, likfunc, x, y, nargout=1): self.last_tnu = tnu # remember for next call post = postStruct() post.sW = np.sqrt(ttau) # unused for FITC_EP prediction with gp.m - dd = 1/(d0+1/ttau) + dd = old_div(1,(d0+old_div(1,ttau))) alpha = tnu/ttau*dd RV = np.dot(R,V) R0tV = np.dot(R0.T,V) @@ -904,10 +910,10 @@ def evaluate(self, meanfunc, covfunc, likfunc, x, y, nargout=1): v = ddiagK - w # v = diag(dK)-diag(dQ) z = np.dot(dd.T,(v+w)) - np.dot(np.atleast_2d((RVdd*RVdd).sum(axis=0)), v) \ - (np.dot(RVdd,dA).T * np.dot(R0tV,RVdd.T)).sum() - dnlZ.cov[ii] = (z - np.dot(alpha.T,(alpha*v)) - np.dot(np.dot(alpha.T,dA),np.dot(R0tV,alpha)))/2. + dnlZ.cov[ii] = old_div((z - np.dot(alpha.T,(alpha*v)) - np.dot(np.dot(alpha.T,dA),np.dot(R0tV,alpha))),2.) dnlZ.cov[ii] = dnlZ.cov[ii][0,0] for ii in range(len(likfunc.hyp)): # likelihood hypers - dlik = likfunc.evaluate(y, nu_n/tau_n+m, 1/tau_n, inffunc, ii, 1) + dlik = likfunc.evaluate(y, old_div(nu_n,tau_n)+m, old_div(1,tau_n), inffunc, ii, 1) dnlZ.lik[ii] = -dlik.sum() if ii == len(likfunc.hyp)-1: # since snu2 is a fixed fraction of sn2, there is a covariance-like term @@ -917,7 +923,7 @@ def evaluate(self, meanfunc, covfunc, likfunc, x, y, nargout=1): z = z + np.dot(post.alpha.T,post.alpha) - np.dot(alpha.T,(v*alpha)) dnlZ.lik[ii] += snu2*z dnlZ.lik[ii] = dnlZ.lik[ii][0,0] - [junk,dlZ] = likfunc.evaluate(y, nu_n/tau_n, 1/tau_n, inffunc, None, 2) # mean hyps + [junk,dlZ] = likfunc.evaluate(y, old_div(nu_n,tau_n), old_div(1,tau_n), inffunc, None, 2) # mean hyps for ii in range(len(meanfunc.hyp)): dm = meanfunc.getDerMatrix(x, ii) dnlZ.mean[ii] = -np.dot(dlZ.T,dm) diff --git a/pyGPs/Core/lik.py b/pyGPs/Core/lik.py index 4f484f6..eb18f50 100644 --- a/pyGPs/Core/lik.py +++ b/pyGPs/Core/lik.py @@ -1,3 +1,7 @@ +from __future__ import division +from __future__ import absolute_import +from past.utils import old_div +from builtins import object # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] # Shan Huang [shan dot huang at iais dot fraunhofer dot de] @@ -39,12 +43,12 @@ import numpy as np from scipy.special import erf -import inf class Likelihood(object): """Base function for Likelihood function""" def __init__(self): self.hyp = [] + def evaluate(self, y=None, mu=None, s2=None, inffunc=None, der=None, nargout=1): ''' The likelihood functions have two possible modes, the mode being selected @@ -129,6 +133,7 @@ def __init__(self, log_sigma=np.log(0.1) ): self.hyp = [log_sigma] def evaluate(self, y=None, mu=None, s2=None, inffunc=None, der=None, nargout=1): + from . import inf sn2 = np.exp(2. * self.hyp[0]) if inffunc is None: # prediction mode if y is None: @@ -137,7 +142,7 @@ def evaluate(self, y=None, mu=None, s2=None, inffunc=None, der=None, nargout=1): if (not s2 is None) and np.linalg.norm(s2) > 0: s2zero = False if s2zero: # log probability - lp = -(y-mu)**2 /sn2/2 - np.log(2.*np.pi*sn2)/2. + lp = -(y-mu)**2 /sn2/2 - old_div(np.log(2.*np.pi*sn2),2.) s2 = np.zeros_like(s2) else: inf_func = inf.EP() # prediction @@ -154,29 +159,29 @@ def evaluate(self, y=None, mu=None, s2=None, inffunc=None, der=None, nargout=1): else: if isinstance(inffunc, inf.EP): if der is None: # no derivative mode - lZ = -(y-mu)**2/(sn2+s2)/2. - np.log(2*np.pi*(sn2+s2))/2. # log part function + lZ = -(y-mu)**2/(sn2+s2)/2. - old_div(np.log(2*np.pi*(sn2+s2)),2.) # log part function if nargout>1: - dlZ = (y-mu)/(sn2+s2) # 1st derivative w.r.t. mean + dlZ = old_div((y-mu),(sn2+s2)) # 1st derivative w.r.t. mean if nargout>2: - d2lZ = -1/(sn2+s2) # 2nd derivative w.r.t. mean + d2lZ = old_div(-1,(sn2+s2)) # 2nd derivative w.r.t. mean return lZ,dlZ,d2lZ else: return lZ,dlZ else: return lZ else: # derivative mode - dlZhyp = ((y-mu)**2/(sn2+s2)-1) / (1+s2/sn2) # deriv. w.r.t. hyp.lik + dlZhyp = old_div((old_div((y-mu)**2,(sn2+s2))-1), (1+old_div(s2,sn2))) # deriv. w.r.t. hyp.lik return dlZhyp elif isinstance(inffunc, inf.Laplace): if der is None: # no derivative mode if y is None: y=0 ymmu = y-mu - lp = -ymmu**2/(2*sn2) - np.log(2*np.pi*sn2)/2. + lp = old_div(-ymmu**2,(2*sn2)) - old_div(np.log(2*np.pi*sn2),2.) if nargout>1: - dlp = ymmu/sn2 # dlp, derivative of log likelihood + dlp = old_div(ymmu,sn2) # dlp, derivative of log likelihood if nargout>2: # d2lp, 2nd derivative of log likelihood - d2lp = -np.ones_like(ymmu)/sn2 + d2lp = old_div(-np.ones_like(ymmu),sn2) if nargout>3: # d3lp, 3rd derivative of log likelihood d3lp = np.zeros_like(ymmu) return lp,dlp,d2lp,d3lp @@ -187,7 +192,7 @@ def evaluate(self, y=None, mu=None, s2=None, inffunc=None, der=None, nargout=1): else: return lp else: # derivative mode - lp_dhyp = (y-mu)**2/sn2 - 1 # derivative of log likelihood w.r.t. hypers + lp_dhyp = old_div((y-mu)**2,sn2) - 1 # derivative of log likelihood w.r.t. hypers dlp_dhyp = 2*(mu-y)/sn2 # first derivative, d2lp_dhyp = 2*np.ones_like(mu)/sn2 # and also of the second mu derivative return lp_dhyp,dlp_dhyp,d2lp_dhyp @@ -239,6 +244,7 @@ def __init__(self): self.hyp = [] def evaluate(self, y=None, mu=None, s2=None, inffunc=None, der=None, nargout=1): + from . import inf if not y is None: y = np.sign(y) y[y==0] = 1 @@ -288,7 +294,7 @@ def evaluate(self, y=None, mu=None, s2=None, inffunc=None, der=None, nargout=1): elif isinstance(inffunc, inf.EP): if der is None: # no derivative mode - z = mu/np.sqrt(1+s2) + z = old_div(mu,np.sqrt(1+s2)) junk,lZ = self.cumGauss(y,z,2) # log part function if not y is None: z = z*y @@ -325,7 +331,7 @@ def cumGauss(self, y=None, f=None, nargout=1): yf = y*f else: yf = f - p = (1. + erf(yf/np.sqrt(2.)))/2. # likelihood + p = old_div((1. + erf(old_div(yf,np.sqrt(2.)))),2.) # likelihood if nargout>1: lp = self.logphi(yf,p) return p,lp @@ -336,13 +342,13 @@ def gauOverCumGauss(self,f,p): # return n_p = gauOverCumGauss(f,p) n_p = np.zeros_like(f) # safely compute Gaussian over cumulative Gaussian ok = f>-5 # naive evaluation for large values of f - n_p[ok] = (np.exp(-f[ok]**2/2)/np.sqrt(2*np.pi)) / p[ok] + n_p[ok] = old_div((old_div(np.exp(old_div(-f[ok]**2,2)),np.sqrt(2*np.pi))), p[ok]) bd = f<-6 # tight upper bound evaluation - n_p[bd] = np.sqrt(f[bd]**2/4+1)-f[bd]/2 + n_p[bd] = np.sqrt(old_div(f[bd]**2,4)+1)-old_div(f[bd],2) interp = np.logical_and(np.logical_not(ok),np.logical_not(bd)) # linearly interpolate between both of them tmp = f[interp] lam = -5. - f[interp] - n_p[interp] = (1-lam)*(np.exp(-tmp**2/2)/np.sqrt(2*np.pi))/p[interp] + lam *(np.sqrt(tmp**2/4+1)-tmp/2); + n_p[interp] = (1-lam)*(old_div(np.exp(old_div(-tmp**2,2)),np.sqrt(2*np.pi)))/p[interp] + lam *(np.sqrt(old_div(tmp**2,4)+1)-old_div(tmp,2)); return n_p def logphi(self,z,p): @@ -353,9 +359,9 @@ def logphi(self,z,p): bd = z0: s2zero = False # s2==0? if s2zero: # log probability evaluation - lp = -np.abs(y-mu)/b -np.log(2*b); s2 = 0 + lp = old_div(-np.abs(y-mu),b) -np.log(2*b); s2 = 0 else: # prediction lp = self.evaluate(y, mu, s2, inf.EP()) if nargout>1: @@ -403,9 +410,9 @@ def evaluate(self, y=None, mu=None, s2=None, inffunc=None, der=None, nargout=1): if y is None: y = np.zeros_like(mu) ymmu = y-mu - lp = np.abs(ymmu)/b - np.log(2*b) + lp = old_div(np.abs(ymmu),b) - np.log(2*b) if nargout>1: # derivative of log likelihood - dlp = np.sign(ymmu)/b + dlp = old_div(np.sign(ymmu),b) if nargout>2: # 2nd derivative of log likelihood d2lp = np.zeros_like(ymmu) if nargout>3: # 3rd derivative of log likelihood @@ -418,8 +425,8 @@ def evaluate(self, y=None, mu=None, s2=None, inffunc=None, der=None, nargout=1): else: return lp else: # derivative w.r.t. hypers - lp_dhyp = np.abs(y-mu)/b - 1 # derivative of log likelihood w.r.t. hypers - dlp_dhyp = np.sign(mu-y)/b # first derivative, + lp_dhyp = old_div(np.abs(y-mu),b) - 1 # derivative of log likelihood w.r.t. hypers + dlp_dhyp = old_div(np.sign(mu-y),b) # first derivative, d2lp_dhyp = np.zeros(mu.shape) # and also of the second mu derivative return lp_dhyp, dlp_dhyp, d2lp_dhyp elif isinstance(inffunc, inf.EP): @@ -439,7 +446,7 @@ def evaluate(self, y=None, mu=None, s2=None, inffunc=None, der=None, nargout=1): dlZ = np.zeros((n,1)) d2lZ = np.zeros((n,1)) if np.any(idlik): - l = Gauss(log_sigma=np.log(s2[idlik])/2) + l = Gauss(log_sigma=old_div(np.log(s2[idlik]),2)) a = l.evaluate(mu[idlik], y[idlik]) lZ[idlik] = a[0]; dlZ[idlik] = a[1]; d2lZ[idlik] = a[2] if np.any(idgau): @@ -448,11 +455,11 @@ def evaluate(self, y=None, mu=None, s2=None, inffunc=None, der=None, nargout=1): lZ[idgau] = a[0]; dlZ[idgau] = a[1]; d2lZ[idgau] = a[2] if np.any(id): # substitution to obtain unit variance, zero mean Laplacian - tvar = s2[id]/(sn[id]**2+1e-16) - tmu = (mu[id]-y[id])/(sn[id]+1e-16) + tvar = old_div(s2[id],(sn[id]**2+1e-16)) + tmu = old_div((mu[id]-y[id]),(sn[id]+1e-16)) # an implementation based on logphi(t) = log(normcdf(t)) - zp = (tmu+np.sqrt(2)*tvar)/np.sqrt(tvar) - zm = (tmu-np.sqrt(2)*tvar)/np.sqrt(tvar) + zp = old_div((tmu+np.sqrt(2)*tvar),np.sqrt(tvar)) + zm = old_div((tmu-np.sqrt(2)*tvar),np.sqrt(tvar)) ap = self._logphi(-zp)+np.sqrt(2)*tmu am = self._logphi( zm)-np.sqrt(2)*tmu apam = np.vstack((ap,am)).T @@ -461,16 +468,16 @@ def evaluate(self, y=None, mu=None, s2=None, inffunc=None, der=None, nargout=1): if nargout>1: lqp = -0.5*zp**2 - 0.5*np.log(2*np.pi) - self._logphi(-zp); # log( N(z)/Phi(z) ) lqm = -0.5*zm**2 - 0.5*np.log(2*np.pi) - self._logphi( zm); - dap = -np.exp(lqp-0.5*np.log(s2[id])) + np.sqrt(2)/sn[id] - dam = np.exp(lqm-0.5*np.log(s2[id])) - np.sqrt(2)/sn[id] + dap = -np.exp(lqp-0.5*np.log(s2[id])) + old_div(np.sqrt(2),sn[id]) + dam = np.exp(lqm-0.5*np.log(s2[id])) - old_div(np.sqrt(2),sn[id]) _z1 = np.vstack((ap,am)).T _z2 = np.vstack((dap,dam)).T _x = np.array([[1],[1]]) dlZ[id] = self._expABz_expAx(_z1, _x, _z2, _x) if nargout>2: a = np.sqrt(8.)/sn[id]/np.sqrt(s2[id]); - bp = 2./sn[id]**2 - (a - zp/s2[id])*np.exp(lqp) - bm = 2./sn[id]**2 - (a + zm/s2[id])*np.exp(lqm) + bp = old_div(2.,sn[id]**2) - (a - old_div(zp,s2[id]))*np.exp(lqp) + bm = old_div(2.,sn[id]**2) - (a + old_div(zm,s2[id]))*np.exp(lqm) _x = np.reshape(np.array([1,1]),(2,1)) _z1 = np.reshape(np.array([ap,am]),(1,2)) _z2 = np.reshape(np.array([bp,bm]),(1,2)) @@ -491,12 +498,12 @@ def evaluate(self, y=None, mu=None, s2=None, inffunc=None, der=None, nargout=1): if np.any(id): # substitution to obtain unit variance, zero mean Laplacian - tmu = (mu[id]-y[id])/(sn[id]+1e-16); tvar = s2[id]/(sn[id]**2+1e-16) - zp = (tvar+tmu/np.sqrt(2))/np.sqrt(tvar); vp = tvar+np.sqrt(2)*tmu - zm = (tvar-tmu/np.sqrt(2))/np.sqrt(tvar); vm = tvar-np.sqrt(2)*tmu - dzp = (-s2[id]/sn[id]+tmu*sn[id]/np.sqrt(2)) / np.sqrt(s2[id]) + tmu = old_div((mu[id]-y[id]),(sn[id]+1e-16)); tvar = old_div(s2[id],(sn[id]**2+1e-16)) + zp = old_div((tvar+old_div(tmu,np.sqrt(2))),np.sqrt(tvar)); vp = tvar+np.sqrt(2)*tmu + zm = old_div((tvar-old_div(tmu,np.sqrt(2))),np.sqrt(tvar)); vm = tvar-np.sqrt(2)*tmu + dzp = old_div((old_div(-s2[id],sn[id])+tmu*sn[id]/np.sqrt(2)), np.sqrt(s2[id])) dvp = -2*tvar - np.sqrt(2)*tmu - dzm = (-s2[id]/sn[id]-tmu*sn[id]/np.sqrt(2)) / np.sqrt(s2[id]) + dzm = old_div((old_div(-s2[id],sn[id])-tmu*sn[id]/np.sqrt(2)), np.sqrt(s2[id])) dvm = -2*tvar + np.sqrt(2)*tmu lezp = self._lerfc(zp); # ap = exp(vp).*ezp lezm = self._lerfc(zm); # am = exp(vm).*ezm @@ -505,7 +512,7 @@ def evaluate(self, y=None, mu=None, s2=None, inffunc=None, der=None, nargout=1): em = np.exp(vm+lezm-vmax) dap = ep*(dvp - 2/np.sqrt(np.pi)*np.exp(-zp**2-lezp)*dzp) dam = em*(dvm - 2/np.sqrt(np.pi)*np.exp(-zm**2-lezm)*dzm) - dlZhyp[id] = (dap+dam)/(ep+em) - 1; + dlZhyp[id] = old_div((dap+dam),(ep+em)) - 1; return dlZhyp # deriv. wrt hyp.lik elif isinstance(inffunc, inf.VB): n = len(s2.flatten()); b = np.zeros((n,1)); y = y*np.ones((n,1)); z = y @@ -520,8 +527,8 @@ def _lerfc(self,t): bd = t>tmax # evaluate tight bound nok = np.logical_not(ok) interp = np.logical_and(nok,np.logical_not(bd)) # interpolate between both of them - f[nok] = np.log(2/np.sqrt(np.pi)) -t[nok]**2 -np.log(t[nok]+np.sqrt( t[nok]**2+4/np.pi )) - lam = 1/(1+np.exp( 12*(0.5-(t[interp]-tmin)/(tmax-tmin)) )) # interp. weights + f[nok] = np.log(old_div(2,np.sqrt(np.pi))) -t[nok]**2 -np.log(t[nok]+np.sqrt( t[nok]**2+old_div(4,np.pi) )) + lam = old_div(1,(1+np.exp( 12*(0.5-old_div((t[interp]-tmin),(tmax-tmin))) ))) # interp. weights f[interp] = lam*f[interp] + (1-lam)*np.log(erfc( t[interp] )) f[ok] += np.log(erfc( t[ok] )) # safe eval return f @@ -536,7 +543,7 @@ def _expABz_expAx(self,A,x,B,z): maxA = np.max(A,axis=1) # number of columns, max over columns maxA = np.array([maxA]).T A = A - np.dot(maxA, np.ones((1,N))) # subtract maximum value - y = ( np.dot((np.exp(A)*B),z) ) / ( np.dot(np.exp(A),x) ) + y = old_div(( np.dot((np.exp(A)*B),z) ), ( np.dot(np.exp(A),x) )) return y[0] def _logphi(self,z): @@ -549,10 +556,10 @@ def _logphi(self,z): bd = z 0: return PowerOfMean(self,number) else: - print "only non-zero integers are supported for **" + print("only non-zero integers are supported for **") @@ -340,10 +344,10 @@ class Linear(Mean): ''' def __init__(self, D=None, alpha_list=None): if alpha_list is None: - if D is None: - self.hyp = [0.5] - else: - self.hyp = [0.5 for i in xrange(D)] + if D is None: + self.hyp = [0.5] + else: + self.hyp = [0.5 for i in range(D)] else: self.hyp = alpha_list diff --git a/pyGPs/Core/opt.py b/pyGPs/Core/opt.py index 73763b7..43a64b0 100644 --- a/pyGPs/Core/opt.py +++ b/pyGPs/Core/opt.py @@ -1,3 +1,9 @@ +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from builtins import range +from past.utils import old_div +from builtins import object #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -20,7 +26,7 @@ import numpy as np import pyGPs -import gp + from scipy.optimize import fmin_bfgs as bfgs from scipy.optimize import fmin_cg as cg from pyGPs.Optimization import minimize, scg @@ -29,6 +35,7 @@ class Optimizer(object): def __init__(self, model=None, searchConfig = None): self.model = model + from . import gp def findMin(self, x, y, numIters): ''' @@ -103,9 +110,9 @@ def findMin(self, x, y, numIters = 100): funcValue = opt[1] warnFlag = opt[4] if warnFlag == 1: - print "Maximum number of iterations exceeded." + print("Maximum number of iterations exceeded.") elif warnFlag == 2: - print "Gradient and/or function calls not changing." + print("Gradient and/or function calls not changing.") except: self.errorCounter += 1 if not self.searchConfig: @@ -118,7 +125,7 @@ def findMin(self, x, y, numIters = 100): raise Exception('Specify at least one of the stop conditions') while True: self.trailsCounter += 1 # increase counter - for i in xrange(hypInArray.shape[0]): # random init of hyp + for i in range(hypInArray.shape[0]): # random init of hyp hypInArray[i]= np.random.uniform(low=searchRange[i][0], high=searchRange[i][1]) # value this time is better than optiaml min value try: @@ -128,14 +135,14 @@ def findMin(self, x, y, numIters = 100): optimalHyp = thisopt[0] except: self.errorCounter += 1 - if self.searchConfig.num_restarts and self.errorCounter > self.searchConfig.num_restarts/2: - print "[CG] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter) + if self.searchConfig.num_restarts and self.errorCounter > old_div(self.searchConfig.num_restarts,2): + print("[CG] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter)) raise Exception("Over half of the trails failed for conjugate gradient") if self.searchConfig.num_restarts and self.trailsCounter > self.searchConfig.num_restarts-1: # if exceed num_restarts - print "[CG] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter) + print("[CG] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter)) return optimalHyp, funcValue if self.searchConfig.min_threshold and funcValue <= self.searchConfig.min_threshold: # reach provided mininal - print "[CG] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter) + print("[CG] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter)) return optimalHyp, funcValue return optimalHyp, funcValue @@ -163,9 +170,9 @@ def findMin(self, x, y, numIters = 100): funcValue = opt[1] warnFlag = opt[6] if warnFlag == 1: - print "Maximum number of iterations exceeded." + print("Maximum number of iterations exceeded.") elif warnFlag == 2: - print "Gradient and/or function calls not changing." + print("Gradient and/or function calls not changing.") except: self.errorCounter += 1 if not self.searchConfig: @@ -179,7 +186,7 @@ def findMin(self, x, y, numIters = 100): raise Exception('Specify at least one of the stop conditions') while True: self.trailsCounter += 1 # increase counter - for i in xrange(hypInArray.shape[0]): # random init of hyp + for i in range(hypInArray.shape[0]): # random init of hyp hypInArray[i]= np.random.uniform(low=searchRange[i][0], high=searchRange[i][1]) # value this time is better than optiaml min value try: @@ -189,14 +196,14 @@ def findMin(self, x, y, numIters = 100): optimalHyp = thisopt[0] except: self.errorCounter += 1 - if self.searchConfig.num_restarts and self.errorCounter > self.searchConfig.num_restarts/2: - print "[BFGS] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter) + if self.searchConfig.num_restarts and self.errorCounter > old_div(self.searchConfig.num_restarts,2): + print("[BFGS] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter)) raise Exception("Over half of the trails failed for BFGS") if self.searchConfig.num_restarts and self.trailsCounter > self.searchConfig.num_restarts-1: # if exceed num_restarts - print "[BFGS] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter) + print("[BFGS] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter)) return optimalHyp, funcValue if self.searchConfig.min_threshold and funcValue <= self.searchConfig.min_threshold: # reach provided mininal - print "[BFGS] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter) + print("[BFGS] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter)) return optimalHyp, funcValue return optimalHyp, funcValue @@ -235,7 +242,7 @@ def findMin(self, x, y, numIters = 100): raise Exception('Specify at least one of the stop conditions') while True: self.trailsCounter += 1 # increase counter - for i in xrange(hypInArray.shape[0]): # random init of hyp + for i in range(hypInArray.shape[0]): # random init of hyp hypInArray[i]= np.random.uniform(low=searchRange[i][0], high=searchRange[i][1]) # value this time is better than optiaml min value try: @@ -245,14 +252,14 @@ def findMin(self, x, y, numIters = 100): optimalHyp = thisopt[0] except: self.errorCounter += 1 - if self.searchConfig.num_restarts and self.errorCounter > self.searchConfig.num_restarts/2: - print "[Minimize] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter) + if self.searchConfig.num_restarts and self.errorCounter > old_div(self.searchConfig.num_restarts,2): + print("[Minimize] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter)) raise Exception("Over half of the trails failed for minimize") if self.searchConfig.num_restarts and self.trailsCounter > self.searchConfig.num_restarts-1: # if exceed num_restarts - print "[Minimize] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter) + print("[Minimize] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter)) return optimalHyp, funcValue if self.searchConfig.min_threshold and funcValue <= self.searchConfig.min_threshold: # reach provided mininal - print "[Minimize] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter) + print("[Minimize] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter)) return optimalHyp, funcValue return optimalHyp, funcValue @@ -289,7 +296,7 @@ def findMin(self, x, y, numIters = 100): raise Exception('Specify at least one of the stop conditions') while True: self.trailsCounter += 1 # increase counter - for i in xrange(hypInArray.shape[0]): # random init of hyp + for i in range(hypInArray.shape[0]): # random init of hyp hypInArray[i]= np.random.uniform(low=searchRange[i][0], high=searchRange[i][1]) # value this time is better than optiaml min value try: @@ -299,14 +306,14 @@ def findMin(self, x, y, numIters = 100): optimalHyp = thisopt[0] except: self.errorCounter += 1 - if self.searchConfig.num_restarts and self.errorCounter > self.searchConfig.num_restarts/2: - print "[SCG] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter) + if self.searchConfig.num_restarts and self.errorCounter > old_div(self.searchConfig.num_restarts,2): + print("[SCG] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter)) raise Exception("Over half of the trails failed for Scaled conjugate gradient") if self.searchConfig.num_restarts and self.trailsCounter > self.searchConfig.num_restarts-1: # if exceed num_restarts - print "[SCG] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter) + print("[SCG] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter)) return optimalHyp, funcValue if self.searchConfig.min_threshold and funcValue <= self.searchConfig.min_threshold: # reach provided mininal - print "[SCG] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter) + print("[SCG] %d out of %d trails failed during optimization" % (self.errorCounter, self.trailsCounter)) return optimalHyp, funcValue return optimalHyp, funcValue diff --git a/pyGPs/Core/tools.py b/pyGPs/Core/tools.py index 3ad619d..bb9b360 100644 --- a/pyGPs/Core/tools.py +++ b/pyGPs/Core/tools.py @@ -1,3 +1,7 @@ +from __future__ import division +from __future__ import print_function +from past.builtins import cmp +from past.utils import old_div #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -60,17 +64,17 @@ def jitchol(A,maxtries=5): else: diagA = np.diag(A) if np.any(diagA <= 0.): - raise np.linalg.LinAlgError, "kernel matrix not positive definite: non-positive diagonal elements" + raise np.linalg.LinAlgError("kernel matrix not positive definite: non-positive diagonal elements") jitter = diagA.mean() * 1e-9 while maxtries > 0 and np.isfinite(jitter): - print 'Warning: adding jitter of {:.10e} to diagnol of kernel matrix for numerical stability'.format(jitter) + print('Warning: adding jitter of {:.10e} to diagnol of kernel matrix for numerical stability'.format(jitter)) try: return np.linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True) except: jitter *= 10 finally: maxtries -= 1 - raise np.linalg.LinAlgError, "kernel matrix not positive definite, even with jitter." + raise np.linalg.LinAlgError("kernel matrix not positive definite, even with jitter.") @@ -190,7 +194,7 @@ def brentmin(xlow,xupp,Nitmax,tol,f,nout=None,*args): funccount += 1 fv = fx; fw = fx xm = 0.5*(a+b) - tol1 = seps*abs(xf) + tol/3.0; + tol1 = seps*abs(xf) + old_div(tol,3.0); tol2 = 2.0*tol1 # Main loop while ( abs(xf-xm) > (tol2 - 0.5*(b-a)) ): @@ -210,7 +214,7 @@ def brentmin(xlow,xupp,Nitmax,tol,f,nout=None,*args): # Is the parabola acceptable if ( (abs(p)q*(a-xf)) and (p= Nitmax: # typically we should not get here # print 'Warning: Specified number of function evaluation reached (brentmin)' diff --git a/pyGPs/Demo/Airline/demo_Airline.py b/pyGPs/Demo/Airline/demo_Airline.py index 6c8a26a..f95476e 100755 --- a/pyGPs/Demo/Airline/demo_Airline.py +++ b/pyGPs/Demo/Airline/demo_Airline.py @@ -1,3 +1,5 @@ +from __future__ import print_function +from builtins import range #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -51,7 +53,7 @@ # the deafult mean will be adapted to the average value of the training labels.. # ..if you do not specify mean function by your own. model.optimize(x, y) - print _, model.nlZ + print(_, model.nlZ) model.predict(xt) model.plot() diff --git a/pyGPs/Demo/Classification/demo_GPC.py b/pyGPs/Demo/Classification/demo_GPC.py index f08bbbd..d16d634 100644 --- a/pyGPs/Demo/Classification/demo_GPC.py +++ b/pyGPs/Demo/Classification/demo_GPC.py @@ -1,3 +1,4 @@ +from __future__ import print_function #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -18,8 +19,8 @@ # you may want to read demo_GPR, demo_kernel and demo_optimization first! # Here, the focus is on the difference of classification model. -print '' -print '---------------------GPC DEMO-------------------------' +print('') +print('---------------------GPC DEMO-------------------------') #---------------------------------------------------------------------- # Load demo data (generated from Gaussians) @@ -44,20 +45,20 @@ #---------------------------------------------------------------------- # First example -> state default values #---------------------------------------------------------------------- -print 'Basic Example - Data' +print('Basic Example - Data') model = pyGPs.GPC() # binary classification (default inference method: EP) model.plotData_2d(x1,x2,t1,t2,p1,p2) model.getPosterior(x, y) # fit default model (mean zero & rbf kernel) with data model.optimize(x, y) # optimize hyperparamters (default optimizer: single run minimize) model.predict(z) # predict test cases -print 'Basic Example - Prediction' +print('Basic Example - Prediction') model.plot(x1,x2,t1,t2) #---------------------------------------------------------------------- # GP classification example #---------------------------------------------------------------------- -print 'More Advanced Example' +print('More Advanced Example') # Start from a new model model = pyGPs.GPC() @@ -66,9 +67,9 @@ model.setPrior(kernel=k) model.getPosterior(x, y) -print "Negative log marginal liklihood before:", round(model.nlZ,3) +print("Negative log marginal liklihood before:", round(model.nlZ,3)) model.optimize(x, y) -print "Negative log marginal liklihood optimized:", round(model.nlZ,3) +print("Negative log marginal liklihood optimized:", round(model.nlZ,3)) # Prediction n = z.shape[0] @@ -78,7 +79,7 @@ # plot log probability distribution for class +1 model.plot(x1,x2,t1,t2) -print '--------------------END OF DEMO-----------------------' +print('--------------------END OF DEMO-----------------------') diff --git a/pyGPs/Demo/Classification/demo_GPC_FITC.py b/pyGPs/Demo/Classification/demo_GPC_FITC.py index 841e7ed..f6533b3 100644 --- a/pyGPs/Demo/Classification/demo_GPC_FITC.py +++ b/pyGPs/Demo/Classification/demo_GPC_FITC.py @@ -1,3 +1,5 @@ +from __future__ import print_function +from builtins import zip #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -18,8 +20,8 @@ # you may want to read demo_GPR, demo_kernel and demo_optimization first! # Here, the focus is on the difference of FITC classification. -print '' -print '-------------------GPC_FITC DEMO----------------------' +print('') +print('-------------------GPC_FITC DEMO----------------------') #---------------------------------------------------------------------- # Load demo data (generated from Gaussians) @@ -45,7 +47,7 @@ # Sparse GP classification (FITC) example #---------------------------------------------------------------------- -print "Example 1: default inducing points" +print("Example 1: default inducing points") # Start from a new model model = pyGPs.GPC_FITC() @@ -60,7 +62,7 @@ # model.setData(x, y, value_per_axis=10) model.optimize() -print "Negative log marginal likelihood optimized:", round(model.nlZ,3) +print("Negative log marginal likelihood optimized:", round(model.nlZ,3)) # Prediction n = z.shape[0] @@ -70,15 +72,15 @@ -print '------------------------------------------------------' -print "Example 2: user-defined inducing points" +print('------------------------------------------------------') +print("Example 2: user-defined inducing points") model = pyGPs.GPC_FITC() # You can define inducing points yourself. # u = np.array([]) u1,u2 = np.meshgrid(np.linspace(-2,2,5),np.linspace(-2,2,5)) -u = np.array(zip(np.reshape(u2,(np.prod(u2.shape),)),np.reshape(u1,(np.prod(u1.shape),)))) +u = np.array(list(zip(np.reshape(u2,(np.prod(u2.shape),)),np.reshape(u1,(np.prod(u1.shape),))))) # and specify inducing point when seting prior m = pyGPs.mean.Zero() @@ -88,9 +90,9 @@ # The rest is analogous to what we have done before. model.setData(x, y) model.getPosterior() -print "Negative log marginal likelihood before optimization:", round(model.nlZ,3) +print("Negative log marginal likelihood before optimization:", round(model.nlZ,3)) model.optimize() -print "Negative log marginal likelihood optimized:", round(model.nlZ,3) +print("Negative log marginal likelihood optimized:", round(model.nlZ,3)) # predict n = z.shape[0] @@ -100,7 +102,7 @@ -print '--------------------END OF DEMO-----------------------' +print('--------------------END OF DEMO-----------------------') diff --git a/pyGPs/Demo/GraphData/demo_GraphKernel.py b/pyGPs/Demo/GraphData/demo_GraphKernel.py index f80aba5..d5fb8ce 100644 --- a/pyGPs/Demo/GraphData/demo_GraphKernel.py +++ b/pyGPs/Demo/GraphData/demo_GraphKernel.py @@ -1,3 +1,6 @@ +from __future__ import print_function +from builtins import str +from builtins import range #! /usr/bin/env python #coding=utf-8 @@ -34,7 +37,7 @@ N = graph_label.shape[0] # number of graphs) graph_label = np.int8(graph_label) -for i in xrange(N): +for i in range(N): if graph_label[i,0] == 0: graph_label[i,0] -= 1 @@ -43,19 +46,19 @@ #=========================================================================== num_Iteration = 10 w = 1e-4 -dist = 'tv' # possible values: 'tv', 'hellinger' -np.random.seed(1) # set random seed to get reproducible kernel matrices (to account for randomness in kernel average resutls over several returns of the experiment) +dist = 'tv' # possible values: 'tv', 'hellinger' +np.random.seed(1) # set random seed to get reproducible kernel matrices (to account for randomness in kernel average resutls over several returns of the experiment) K = graphKernels.propagationKernel(A, node_label, gr_id, num_Iteration, w, dist, 'label_diffusion', SUM=True, VIS=False, showEachStep=False) #---------------------------------------------------------------------- # Cross Validation #---------------------------------------------------------------------- -print '...GP prediction (10-fold CV)' +print('...GP prediction (10-fold CV)') -for t in xrange(num_Iteration+1): +for t in range(num_Iteration+1): ACC = [] # accuracy - print 'number of kernel iterations =', t + print('number of kernel iterations =', t) Matrix = K[:,:,t] # normalize kernel matrix (not useful for MUTAG) # Matrix = graphUtil.normalizeKernel(Matrix) @@ -86,6 +89,6 @@ ACC.append(acc) - print 'Accuracy: ', np.round(np.mean(ACC),2), '('+str(np.round(np.std(ACC),2))+')' + print('Accuracy: ', np.round(np.mean(ACC),2), '('+str(np.round(np.std(ACC),2))+')') diff --git a/pyGPs/Demo/GraphData/graphData/toNumpyMatrix.py b/pyGPs/Demo/GraphData/graphData/toNumpyMatrix.py index 1f344d8..be55c29 100644 --- a/pyGPs/Demo/GraphData/graphData/toNumpyMatrix.py +++ b/pyGPs/Demo/GraphData/graphData/toNumpyMatrix.py @@ -1,3 +1,4 @@ +from __future__ import print_function import numpy as np from scipy.io import loadmat from scipy.sparse.csc import csc_matrix @@ -12,11 +13,11 @@ node_label = data['responses'] # n x 1 node label array graph_label = data['labels'] # N x 1 graph label array -print A.shape -print A.indices -print A.data -print A.indptr -print type(A) +print(A.shape) +print(A.indices) +print(A.data) +print(A.indptr) +print(type(A)) np.savez("MUTAG", graph_ind=gr_id, responses=node_label, labels=graph_label, adj_data=A.data,\ diff --git a/pyGPs/Demo/Housing/demo_Housing.py b/pyGPs/Demo/Housing/demo_Housing.py index 1167af2..5149058 100755 --- a/pyGPs/Demo/Housing/demo_Housing.py +++ b/pyGPs/Demo/Housing/demo_Housing.py @@ -1,3 +1,7 @@ +from __future__ import division +from __future__ import print_function +from builtins import range +from past.utils import old_div #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -24,20 +28,20 @@ N = 25 # Get all data (exclude the 4th column which is binary) except the last 50 points for training x = np.concatenate((data[:-N,:4],data[:-N,5:-1]),axis=1) - x = (x - np.mean(x,axis=0))/(np.std(x,axis=0)+1.e-16) + x = old_div((x - np.mean(x,axis=0)),(np.std(x,axis=0)+1.e-16)) # The function we will perform regression on: Median Value of owner occupied homes y = np.reshape(data[:-N,-1],(len(data[:-N,-1]),1)) - y = (y-np.mean(y))/(np.std(y)+1.e-16) + y = old_div((y-np.mean(y)),(np.std(y)+1.e-16)) # Test on the last 50 points xs = np.concatenate((data[-N:,:4],data[-N:,5:-1]),axis=1) - xs = (xs - np.mean(xs,axis=0))/(np.std(xs,axis=0)+1.e-16) + xs = old_div((xs - np.mean(xs,axis=0)),(np.std(xs,axis=0)+1.e-16)) ys = np.reshape(data[-N:,-1],(N,1)) - ys = (ys-np.mean(ys))/(np.std(ys)+1.e-16) + ys = old_div((ys-np.mean(ys)),(np.std(ys)+1.e-16)) N,D = x.shape model = pyGPs.GPR() model.getPosterior(x, y) - print 'Initial negative log marginal likelihood = ', round(model.nlZ,3) + print('Initial negative log marginal likelihood = ', round(model.nlZ,3)) # train and predict from time import clock @@ -46,17 +50,17 @@ t1 = clock() ym, ys2, fm, fs2, lp = model.predict(xs) xa = np.concatenate((data[:,:4],data[:,5:-1]),axis=1) - xa = (xa - np.mean(xa,axis=0))/(np.std(xa,axis=0)+1.e-16) + xa = old_div((xa - np.mean(xa,axis=0)),(np.std(xa,axis=0)+1.e-16)) ya, ys2a, fma, fs2a, lpa = model.predict(xa) - print 'Time to optimize = ', t1-t0 - print 'Optimized mean = ', model.meanfunc.hyp - print 'Optimized covariance = ', model.covfunc.hyp - print 'Optimized liklihood = ', model.likfunc.hyp - print 'Final negative log marginal likelihood = ', round(model.nlZ,3) + print('Time to optimize = ', t1-t0) + print('Optimized mean = ', model.meanfunc.hyp) + print('Optimized covariance = ', model.covfunc.hyp) + print('Optimized liklihood = ', model.likfunc.hyp) + print('Final negative log marginal likelihood = ', round(model.nlZ,3)) #HousingPlotter(range(len(y)),y,range(len(ym)),ym,ys2,range(len(y),len(y)+len(ys)),ys) - xm = np.array(range(len(y),len(y)+ym.shape[0])) + xm = np.array(list(range(len(y),len(y)+ym.shape[0]))) ym = np.reshape(ym,(ym.shape[0],)) zm = np.reshape(ys2,(ym.shape[0],)) diff --git a/pyGPs/Demo/JHUI/demo_Validation.py b/pyGPs/Demo/JHUI/demo_Validation.py index 2db1ca9..d1c2ac8 100644 --- a/pyGPs/Demo/JHUI/demo_Validation.py +++ b/pyGPs/Demo/JHUI/demo_Validation.py @@ -1,3 +1,6 @@ +from __future__ import print_function +from builtins import str +from builtins import range #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -20,8 +23,8 @@ # This example shows the process of cross-validation using real dataset. -print '' -print '---------------Cross-Validation DEMO------------------' +print('') +print('---------------Cross-Validation DEMO------------------') #---------------------------------------------------------------------- # Load raw data (ionosphere dataset from UCI) @@ -48,7 +51,7 @@ # Here we deal with label in ionosphere data, # change "b" to"-1", and "g" to "+1" n,D = x.shape -for i in xrange(n): +for i in range(n): if y[i,0][0] == 'g': y[i,0] = 1 else: @@ -65,7 +68,7 @@ cv_run = 0 for x_train, x_test, y_train, y_test in valid.k_fold_validation(x, y, K): - print 'Run:', cv_run + print('Run:', cv_run) # This is a binary classification problem model = pyGPs.GPC() # Since no prior knowldege, leave everything default @@ -79,17 +82,17 @@ # Evluation acc = valid.ACC(ymu_class, y_test) - print ' accuracy =', round(acc,2) + print(' accuracy =', round(acc,2)) rmse = valid.RMSE(ymu_class, y_test) - print ' rmse =', round(rmse,2) + print(' rmse =', round(rmse,2)) ACC.append(acc) RMSE.append(rmse) # Toward next run cv_run += 1 -print '\nAccuracy: ', np.round(np.mean(ACC),2), '('+str(np.round(np.std(ACC),2))+')' -print 'Root-Mean-Square Error: ', np.round(np.mean(RMSE),2) +print('\nAccuracy: ', np.round(np.mean(ACC),2), '('+str(np.round(np.std(ACC),2))+')') +print('Root-Mean-Square Error: ', np.round(np.mean(RMSE),2)) @@ -100,12 +103,12 @@ ''' We defined the following measures(@SEE pyGPs.Valid.valid): - RMSE - Root mean squared error - ACC - Classification accuracy - Prec - Precision for class +1 - Recall - Recall for class +1 - NLPD - Negative log predictive density in transformed observation space + RMSE - Root mean squared error + ACC - Classification accuracy + Prec - Precision for class +1 + Recall - Recall for class +1 + NLPD - Negative log predictive density in transformed observation space ''' -print '--------------------END OF DEMO-----------------------' +print('--------------------END OF DEMO-----------------------') diff --git a/pyGPs/Demo/MaunaLoa/demo_MaunaLoa.py b/pyGPs/Demo/MaunaLoa/demo_MaunaLoa.py index 36c3534..648b4de 100755 --- a/pyGPs/Demo/MaunaLoa/demo_MaunaLoa.py +++ b/pyGPs/Demo/MaunaLoa/demo_MaunaLoa.py @@ -1,3 +1,7 @@ +from __future__ import division +from __future__ import print_function +from builtins import zip +from past.utils import old_div #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -32,7 +36,7 @@ if __name__ == '__main__': # LOAD data - infile = 'mauna.txt' # Note: Samples with value -99.99 were dropped. + infile = 'mauna.txt' # Note: Samples with value -99.99 were dropped. f = open(infile,'r') year = [] co2 = [] @@ -54,18 +58,18 @@ n,D = x.shape # TEST POINTS - xs = np.arange(2004+1./24.,2024-1./24.,1./12.) + xs = np.arange(2004+old_div(1.,24.),2024-old_div(1.,24.),old_div(1.,12.)) xs = xs.reshape(len(xs),1) # DEFINE parameterized covariance function k1 = pyGPs.cov.RBF(np.log(67.), np.log(66.)) k2 = pyGPs.cov.Periodic(np.log(1.3), np.log(1.0), np.log(2.4)) * pyGPs.cov.RBF(np.log(90.), np.log(2.4)) k3 = pyGPs.cov.RQ(np.log(1.2), np.log(0.66), np.log(0.78)) - k4 = pyGPs.cov.RBF(np.log(1.6/12.), np.log(0.18)) + pyGPs.cov.Noise(np.log(0.19)) + k4 = pyGPs.cov.RBF(np.log(old_div(1.6,12.)), np.log(0.18)) + pyGPs.cov.Noise(np.log(0.19)) k = k1 + k2 + k3 + k4 # STANDARD GP (prediction) - print 'Original CO2 Data:' + print('Original CO2 Data:') model = pyGPs.GPR() model.setData(x,y) model.plotData_1d() @@ -79,11 +83,11 @@ t1 = clock() model.predict(xs) - print 'Using Handcrafted Kernel from GPML Book:' - print 'Time to optimize = ', t1-t0 - print 'Optimized mean = ', model.meanfunc.hyp - print 'Optimized covariance = ', model.covfunc.hyp - print 'Optimized liklihood = ', model.likfunc.hyp - print 'Final negative log marginal likelihood = ', round(model.nlZ,3) + print('Using Handcrafted Kernel from GPML Book:') + print('Time to optimize = ', t1-t0) + print('Optimized mean = ', model.meanfunc.hyp) + print('Optimized covariance = ', model.covfunc.hyp) + print('Optimized liklihood = ', model.likfunc.hyp) + print('Final negative log marginal likelihood = ', round(model.nlZ,3)) model.plot() diff --git a/pyGPs/Demo/Regression/demo_GPR.py b/pyGPs/Demo/Regression/demo_GPR.py index 145b753..bc8baec 100644 --- a/pyGPs/Demo/Regression/demo_GPR.py +++ b/pyGPs/Demo/Regression/demo_GPR.py @@ -1,3 +1,4 @@ +from __future__ import print_function #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -27,8 +28,8 @@ -print '' -print '---------------------GPR DEMO-------------------------' +print('') +print('---------------------GPR DEMO-------------------------') #---------------------------------------------------------------------- # Load demo data (generated from Gaussians) @@ -41,21 +42,21 @@ #---------------------------------------------------------------------- # A five-line example #---------------------------------------------------------------------- -print 'Basic Example' +print('Basic Example') model = pyGPs.GPR() # model -print 'Before Optimization' +print('Before Optimization') model.setData(x,y) model.predict(z) # predict test cases (before optimization) model.plot() # and plot result model.optimize(x, y) # optimize hyperparamters (default optimizer: single run minimize) -print 'After Optimization' +print('After Optimization') model.predict(z) # predict test cases model.plot() # and plot result #---------------------------------------------------------------------- # Now lets do another example to get more insight to the toolbox #---------------------------------------------------------------------- -print 'More Advanced Example (using a non-zero mean and Matern7 kernel)' +print('More Advanced Example (using a non-zero mean and Matern7 kernel)') model = pyGPs.GPR() # start from a new model # Specify non-default mean and covariance functions @@ -95,7 +96,7 @@ # model.fm (predictive latent means) # model.fs2 (predictive latent variances) # model.lp (log predictive probability) -print 'Optimized negative log marginal likelihood:', round(model.nlZ,3) +print('Optimized negative log marginal likelihood:', round(model.nlZ,3)) # Predict test data @@ -125,5 +126,5 @@ # You don't need it if you optimize it later anyway model.setNoise( log_sigma=np.log(0.1) ) -print '--------------------END OF DEMO-----------------------' +print('--------------------END OF DEMO-----------------------') diff --git a/pyGPs/Demo/Regression/demo_GPR_FITC.py b/pyGPs/Demo/Regression/demo_GPR_FITC.py index 0bf68b1..6a2b86e 100644 --- a/pyGPs/Demo/Regression/demo_GPR_FITC.py +++ b/pyGPs/Demo/Regression/demo_GPR_FITC.py @@ -1,3 +1,6 @@ +from __future__ import division +from __future__ import print_function +from past.utils import old_div #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -18,8 +21,8 @@ # you may want to read demo_GPR, demo_kernel and demo_optimization first! # Here, the focus is on the difference of FITC model. -print '' -print '-------------------GPR_FITC DEMO----------------------' +print('') +print('-------------------GPR_FITC DEMO----------------------') #---------------------------------------------------------------------- # Load demo data (generated from Gaussians) @@ -35,7 +38,7 @@ # Sparse GP regression (FITC) example #---------------------------------------------------------------------- -print "Example 1: default inducing points" +print("Example 1: default inducing points") # Start from a new model model = pyGPs.GPR_FITC() @@ -50,7 +53,7 @@ # model.setData(x, y, value_per_axis=10) model.optimize() -print "Negative log marginal liklihood optimized:", round(model.nlZ,3) +print("Negative log marginal liklihood optimized:", round(model.nlZ,3)) # Prediction model.predict(z) @@ -59,8 +62,8 @@ -print '------------------------------------------------------' -print "Example 2: user-defined inducing points" +print('------------------------------------------------------') +print("Example 2: user-defined inducing points") # Start from a new model model = pyGPs.GPR_FITC() @@ -70,7 +73,7 @@ u = np.array([[-1], [-0.8], [-0.5], [0.3],[1.]]) # or equally-spaced inducing points -num_u = np.fix(x.shape[0]/2) +num_u = np.fix(old_div(x.shape[0],2)) u = np.linspace(-1.3,1.3,num_u).T u = np.reshape(u,(num_u,1)) @@ -83,9 +86,9 @@ # The rest is analogous to what we have done before model.setData(x, y) model.getPosterior() -print "Negative log marginal liklihood before optimization:", round(model.nlZ,3) +print("Negative log marginal liklihood before optimization:", round(model.nlZ,3)) model.optimize() -print "Negative log marginal liklihood optimized:", round(model.nlZ,3) +print("Negative log marginal liklihood optimized:", round(model.nlZ,3)) # Prediction ymu, ys2, fmu, fs2, lp = model.predict(z) @@ -93,7 +96,7 @@ model.plot() -print '--------------------END OF DEMO-----------------------' +print('--------------------END OF DEMO-----------------------') diff --git a/pyGPs/Demo/USPS/demo_GPMC.py b/pyGPs/Demo/USPS/demo_GPMC.py index 9730d16..b4a6ec0 100644 --- a/pyGPs/Demo/USPS/demo_GPMC.py +++ b/pyGPs/Demo/USPS/demo_GPMC.py @@ -1,3 +1,4 @@ +from __future__ import print_function #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -21,8 +22,8 @@ # you may want to read demo_GPR, demo_kernel and demo_optimization first! # Here, the focus is on multi-class classification. -print '' -print '---------------------GPMC DEMO-----------------------' +print('') +print('---------------------GPMC DEMO-----------------------') # GPMC is NOT based on multi-class Laplace Approximation. # It works as a one vs. one classification wrapper @@ -82,7 +83,7 @@ # Accuracy of recognized digit acc = valid.ACC(predictive_class, ys) -print "Accuracy of recognizing hand-writen digits:", round(acc,2) +print("Accuracy of recognizing hand-writen digits:", round(acc,2)) #---------------------------------------------------------------------- @@ -99,5 +100,5 @@ # there is also an option to predict without optimization # model.fitAndPredict(xs) -print '--------------------END OF DEMO-----------------------' +print('--------------------END OF DEMO-----------------------') diff --git a/pyGPs/Demo/USPS/demo_NodeKernel.py b/pyGPs/Demo/USPS/demo_NodeKernel.py index 2d4c50e..cb13b06 100644 --- a/pyGPs/Demo/USPS/demo_NodeKernel.py +++ b/pyGPs/Demo/USPS/demo_NodeKernel.py @@ -1,3 +1,5 @@ +from __future__ import print_function +from builtins import range #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -28,7 +30,7 @@ def load_binary(D1,D2,reduce=False): D1_list = [] D2_list = [] n,D = x.shape - for i in xrange(n): + for i in range(n): if y[i,D1] == 1: D1_list.append(i) elif y[i,D2] == 1: @@ -76,12 +78,12 @@ def plotDigit(digit, title_str=''): exampleDigit7_2 = np.reshape(exampleDigit7_2,(16,16)) plotDigit(exampleDigit7_2, 'This is another 2.') - # true class 1 + # true class 1 exampleDigitBad = x[70] # digit that predicts wrong for rbf exampleDigitBad = np.reshape(exampleDigitBad,(16,16)) plotDigit(exampleDigitBad, 'This digit is an example where the rbf kernel predicts the wrong class (2). \nDiffusion kernel predicts correctly due to graph information!') - # true class 2 + # true class 2 exampleDigitBad = x[190] # digit that predicts wrong for diff exampleDigitBad = np.reshape(exampleDigitBad,(16,16)) plotDigit(exampleDigitBad, 'This digit is an example where the diff kernel predicts the wrong class (1). \nrbf kernel, however, predicts correctly!') @@ -95,7 +97,7 @@ def plotDigit(digit, title_str=''): N = Matrix.shape[0] # set training and test set - index_train = xrange(N) + index_train = range(N) index_test = [38, 81, 124, 129, 70, 190] index_train = np.setdiff1d(index_train, index_test) @@ -117,7 +119,7 @@ def plotDigit(digit, title_str=''): # evaluation predictive_class_rbf = np.sign(model.ym) - ACC_rbf = valid.ACC(predictive_class_rbf, y_test) + ACC_rbf = valid.ACC(predictive_class_rbf, y_test) ## DIFFUSION Kernel # compute kernel matrix and initalize GP with precomputed kernel @@ -159,10 +161,10 @@ def plotDigit(digit, title_str=''): ACC_sum = valid.ACC(predictive_class_sum, y_test) - print np.hstack((np.array(index_test, ndmin=2).T, y_test, predictive_class_rbf, predictive_class_diff, predictive_class_sum)) - print 'accuracy (RBF): ' , ACC_rbf - print 'accuracy (DIFF): ' , ACC_diff - print 'accuracy (SUM): ' , ACC_sum + print(np.hstack((np.array(index_test, ndmin=2).T, y_test, predictive_class_rbf, predictive_class_diff, predictive_class_sum))) + print('accuracy (RBF): ' , ACC_rbf) + print('accuracy (DIFF): ' , ACC_diff) + print('accuracy (SUM): ' , ACC_sum) diff --git a/pyGPs/Demo/USPS/loadBinaryUSPS.py b/pyGPs/Demo/USPS/loadBinaryUSPS.py index 7f18496..d3b9823 100644 --- a/pyGPs/Demo/USPS/loadBinaryUSPS.py +++ b/pyGPs/Demo/USPS/loadBinaryUSPS.py @@ -1,3 +1,4 @@ +from builtins import range import numpy as np from scipy.io import loadmat import os,sys @@ -14,7 +15,7 @@ def load_binary(D1,D2,reduce=False): D1_list = [] D2_list = [] n,D = x.shape - for i in xrange(n): + for i in range(n): if y[i,D1] == 1: D1_list.append(i) elif y[i,D2] == 1: diff --git a/pyGPs/Demo/generate_data_for_Rasmussen_examples.py b/pyGPs/Demo/generate_data_for_Rasmussen_examples.py index faa5c6b..e05e413 100644 --- a/pyGPs/Demo/generate_data_for_Rasmussen_examples.py +++ b/pyGPs/Demo/generate_data_for_Rasmussen_examples.py @@ -1,3 +1,5 @@ +from builtins import zip +from builtins import range import numpy as np def save_data_regresssion(): @@ -16,7 +18,7 @@ def save_data_regresssion(): # TEST points # test points evenly distributed in the interval [-2, 2.5] - xstar = np.array(range(-200,250,4), dtype=np.float64, ndmin=2).T + xstar = np.array(list(range(-200,250,4)), dtype=np.float64, ndmin=2).T xstar /= 100 np.savez('Regression/regression_data', x=x, y=y, xstar=xstar) @@ -155,7 +157,7 @@ def save_data_classification(): # For plotting, we superimpose the data points with the posterior equi-probability contour # lines for the probability of class two given complete information about the generating mechanism. t1,t2 = np.meshgrid(np.arange(-4,4.1,0.1),np.arange(-4,4.1,0.1)) - t = np.array(zip(np.reshape(t1,(np.prod(t1.shape),)),np.reshape(t2,(np.prod(t2.shape),)))) # these are the test inputs + t = np.array(list(zip(np.reshape(t1,(np.prod(t1.shape),)),np.reshape(t2,(np.prod(t2.shape),))))) # these are the test inputs n = t.shape[0] tmm = np.zeros_like(t) diff --git a/pyGPs/GraphExtensions/__init__.py b/pyGPs/GraphExtensions/__init__.py index 0771e01..7de7fda 100644 --- a/pyGPs/GraphExtensions/__init__.py +++ b/pyGPs/GraphExtensions/__init__.py @@ -1,3 +1,4 @@ -import graphKernels -import graphUtil -import nodeKernels +from __future__ import absolute_import +from . import graphKernels +from . import graphUtil +from . import nodeKernels diff --git a/pyGPs/GraphExtensions/graphKernels.py b/pyGPs/GraphExtensions/graphKernels.py index 7ba3213..6a16b26 100644 --- a/pyGPs/GraphExtensions/graphKernels.py +++ b/pyGPs/GraphExtensions/graphKernels.py @@ -1,3 +1,8 @@ +from __future__ import division +from __future__ import print_function +from builtins import str +from builtins import range +from past.utils import old_div #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -68,7 +73,7 @@ def propagationKernel(A, l, gr_id, h_max, w, p, ktype=None, SUM=True, VIS=False, ## INITIALIZE label probabilities of labeled nodes (num_nodes x num_labels) if l.shape[1]==1: lab_prob = np.zeros((num_nodes,num_labels),dtype=np.float64) - for i in xrange(num_nodes): + for i in range(num_nodes): if l[i,0] > 0: lab_prob[i,l[i,0]-1] = 1 else: @@ -83,7 +88,7 @@ def propagationKernel(A, l, gr_id, h_max, w, p, ktype=None, SUM=True, VIS=False, ## INITIALIZE unlabeled/uninitialized nodes UNIFORMLY idx_unif = np.where(np.sum(lab_prob, axis=1)==0)[0] if idx_unif.shape[0] != 0: - lab_prob[idx_unif,:] = 1./lab_prob.shape[1] + lab_prob[idx_unif,:] = old_div(1.,lab_prob.shape[1]) ## row normalize A -> transition matrix T if (ktype=='label_propagation') or (ktype=='label_diffusion'): row_sums = np.array(A.sum(axis=1))[:,0] @@ -93,12 +98,12 @@ def propagationKernel(A, l, gr_id, h_max, w, p, ktype=None, SUM=True, VIS=False, #=========================================================================== # ## PROPAGATION KERNEL ITERATIONS #=========================================================================== - for h in xrange(h_max+1): - print 'ITERATION: ', h + for h in range(h_max+1): + print('ITERATION: ', h) if h > 0: ## LABEL UPDATE if showEachStep: - print '...computing LABEL UPDATE' + print('...computing LABEL UPDATE') if ktype == 'label_propagation': lab_prob[idx,:] = lab_orig[idx,:] # PUSH BACK original LABELS @@ -112,7 +117,7 @@ def propagationKernel(A, l, gr_id, h_max, w, p, ktype=None, SUM=True, VIS=False, # y(t+1) = alpha*S*y(t)+(1-alpha)*y(0) alpha = 0.8 # compute S - diag = np.array(A.sum(axis=1)).T**(-1/2) + diag = np.array(A.sum(axis=1)).T**(old_div(-1,2)) D = spsp.lil_matrix((A.shape[0], A.shape[1]), dtype=float) # D.setdiag(diag[0,:]) D = D.tocsr() @@ -121,7 +126,7 @@ def propagationKernel(A, l, gr_id, h_max, w, p, ktype=None, SUM=True, VIS=False, ## COMPUTE hashvalues if showEachStep: - print '...computing hashvalues' + print('...computing hashvalues') # determine path to take depending on chosen distance use_cauchy = (p =='L1') or (p =='tv') take_sqrt = (p =='hellinger') @@ -136,18 +141,18 @@ def propagationKernel(A, l, gr_id, h_max, w, p, ktype=None, SUM=True, VIS=False, b = w * np.random.rand() # compute hashes # hashLabels is a Vector with length: number of nodes (hashvalue for respective node) - hashLabels = (np.dot(lab_prob,v) + b) / w + hashLabels = old_div((np.dot(lab_prob,v) + b), w) hashLabels = np.floor(hashLabels) # take floor uniqueHash, hashLabels = np.unique(hashLabels, return_inverse=True) # map to consecutive integer from 0 ## COMPUTE kernel contribution if showEachStep: - print '...computing KERNEL contribution' + print('...computing KERNEL contribution') # aggregate counts on graphs # counts is a matrix: number of graphs x number of hashlabels num_bins = len(uniqueHash) counts = np.zeros((num_graphs, num_bins)) # init counts matrix # accumulate counts of hash labels - for i in xrange(num_nodes): + for i in range(num_nodes): counts[ (gr_id[i,0]-1), hashLabels[i] ] +=1 # compute base kernel (here: LINEAR kernel) @@ -163,7 +168,7 @@ def propagationKernel(A, l, gr_id, h_max, w, p, ktype=None, SUM=True, VIS=False, K[:,:,h] = K_h if showEachStep: - print K[:,:,h] + print(K[:,:,h]) ## VISUALIZE KERNELS if VIS: diff --git a/pyGPs/GraphExtensions/graphUtil.py b/pyGPs/GraphExtensions/graphUtil.py index bf21087..72695fa 100644 --- a/pyGPs/GraphExtensions/graphUtil.py +++ b/pyGPs/GraphExtensions/graphUtil.py @@ -1,3 +1,5 @@ +from __future__ import division +from past.utils import old_div #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -74,7 +76,7 @@ def normalizeKernel(K): ''' Kdiag = np.atleast_2d(np.diag(K)) normalizers = np.sqrt(Kdiag*Kdiag.T) - K_norm = K/normalizers + K_norm = old_div(K,normalizers) return K_norm diff --git a/pyGPs/GraphExtensions/nodeKernels.py b/pyGPs/GraphExtensions/nodeKernels.py index 8cad516..ce9c212 100644 --- a/pyGPs/GraphExtensions/nodeKernels.py +++ b/pyGPs/GraphExtensions/nodeKernels.py @@ -1,3 +1,5 @@ +from __future__ import division +from past.utils import old_div #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -26,7 +28,7 @@ def normLap(A): ''' I = np.identity(A.shape[0]) d = A.sum(axis=0) - d = np.sqrt(1./d) + d = np.sqrt(old_div(1.,d)) D = np.diag(d) L = I - np.dot( np.dot(D,A),D ) return L @@ -91,7 +93,7 @@ def VNDKernel(A, alpha=0.5): ''' I = np.identity(A.shape[0]) d = A.sum(axis=0) - d = np.sqrt(1./d) + d = np.sqrt(old_div(1.,d)) D = np.diag(d) S = np.dot( np.dot(D,A),D ) K = np.linalg.inv( I - alpha*S ) @@ -110,14 +112,14 @@ def rwKernel(A, p=1, a=2): :return: kernel matrix ''' if type(p) != int: - p = int(p) + p = int(p) if p < 1: - raise Exception('Step parameter p needs to be larger than 0.') - if a <= 1: - a=1.0001 + raise Exception('Step parameter p needs to be larger than 0.') + if a <= 1: + a=1.0001 I = np.identity(A.shape[0]) L = normLap(A) - K = np.linalg.matrix_power( a*I - L, p) + K = np.linalg.matrix_power( a*I - L, p) return K diff --git a/pyGPs/Optimization/__init__.py b/pyGPs/Optimization/__init__.py index 47b198b..877e588 100644 --- a/pyGPs/Optimization/__init__.py +++ b/pyGPs/Optimization/__init__.py @@ -1,3 +1,4 @@ -import scg -import minimize -import conf \ No newline at end of file +from __future__ import absolute_import +from . import scg +from . import minimize +from . import conf \ No newline at end of file diff --git a/pyGPs/Optimization/conf.py b/pyGPs/Optimization/conf.py index 09ade68..7bc9479 100644 --- a/pyGPs/Optimization/conf.py +++ b/pyGPs/Optimization/conf.py @@ -1,3 +1,4 @@ +from builtins import object #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] diff --git a/pyGPs/Optimization/minimize.py b/pyGPs/Optimization/minimize.py index 24bdc73..7b1b13d 100644 --- a/pyGPs/Optimization/minimize.py +++ b/pyGPs/Optimization/minimize.py @@ -1,3 +1,6 @@ +from __future__ import division +from __future__ import print_function +from past.utils import old_div #=============================================================================== # This program is distributed WITHOUT ANY WARRANTY; without even the implied @@ -47,7 +50,7 @@ def run(f, X, args=(), length=None, red=1.0, verbose=False): EXT = 3.0; # extrapolate maximum 3 times the current step-size MAX = 20; # max 20 function evaluations per line search RATIO = 10; # maximum allowed slope ratio - SIG = 0.1;RHO = SIG/2;# SIG and RHO are the constants controlling the Wolfe- + SIG = 0.1;RHO = old_div(SIG,2);# SIG and RHO are the constants controlling the Wolfe- #Powell conditions. SIG is the maximum allowed absolute ratio between #previous and new slopes (derivatives in the search direction), thus setting #SIG to low (positive) values forces higher precision in the line-searches. @@ -66,7 +69,7 @@ def run(f, X, args=(), length=None, red=1.0, verbose=False): i = i + (length<0) # count epochs?! s = -df0 d0 = -dot(s,s) # initial search direction (steepest) and slope - x3 = red/(1.0-d0) # initial step is red/(|s|+1) + x3 = old_div(red,(1.0-d0)) # initial step is red/(|s|+1) while i < abs(length): # while not finished i = i + (length>0) # count iterations?! @@ -89,7 +92,7 @@ def run(f, X, args=(), length=None, red=1.0, verbose=False): return success = 1 except: # catch any error which occured in f - x3 = (x2+x3)/2 # bisect and try again + x3 = old_div((x2+x3),2) # bisect and try again if f3 < F0: X0 = X+x3*s; F0 = f3; dF0 = df3 # keep best values d3 = dot(df3,s) # new slope @@ -122,18 +125,18 @@ def run(f, X, args=(), length=None, red=1.0, verbose=False): else: x2 = x3; f2 = f3; d2 = d3 # move point 3 to point 2 if f4 > f0: - x3 = x2-(0.5*d2*(x4-x2)**2)/(f4-f2-d2*(x4-x2)) + x3 = x2-old_div((0.5*d2*(x4-x2)**2),(f4-f2-d2*(x4-x2))) # quadratic interpolation else: A = 6*(f2-f4)/(x4-x2)+3*(d4+d2) # cubic interpolation B = 3*(f4-f2)-(2*d2+d4)*(x4-x2) if A != 0: - x3=x2+(sqrt(B*B-A*d2*(x4-x2)**2)-B)/A + x3=x2+old_div((sqrt(B*B-A*d2*(x4-x2)**2)-B),A) # num. error possible, ok! else: x3 = inf if isnan(x3) or isinf(x3): - x3 = (x2+x4)/2 # if we had a numerical problem then bisect + x3 = old_div((x2+x4),2) # if we had a numerical problem then bisect x3 = max(min(x3, x4-INT*(x4-x2)),x2+INT*(x4-x2)) # don't accept too close result3 = f(X+x3*s, *args) @@ -152,16 +155,16 @@ def run(f, X, args=(), length=None, red=1.0, verbose=False): d3 = d0; d0 = dot(df0,s) if d0 > 0: # new slope must be negative s = -df0; d0 = -dot(s,s) # otherwise use steepest direction - x3 = x3 * min(RATIO, d3/(d0-SMALL)) # slope ratio but max RATIO + x3 = x3 * min(RATIO, old_div(d3,(d0-SMALL))) # slope ratio but max RATIO ls_failed = 0 # this line search did not fail else: X = X0; f0 = F0; df0 = dF0 # restore best point so far if ls_failed or (i>abs(length)): # line search failed twice in a row break # or we ran out of time, so we give up s = -df0; d0 = -dot(s,s) # try steepest - x3 = 1/(1-d0) + x3 = old_div(1,(1-d0)) ls_failed = 1 # this line search failed - if verbose: print "\n" + if verbose: print("\n") #print fX return X, fX, i diff --git a/pyGPs/Optimization/scg.py b/pyGPs/Optimization/scg.py index 8c72751..efd0158 100644 --- a/pyGPs/Optimization/scg.py +++ b/pyGPs/Optimization/scg.py @@ -1,3 +1,6 @@ +from __future__ import division +from __future__ import print_function +from past.utils import old_div #=============================================================================== # SCG Scaled conjugate gradient optimization. # @@ -22,7 +25,7 @@ def run(f, x, args=(), niters = 100, gradcheck = False, display = 0, flog = False, pointlog = False, scalelog = False, tolX = 1.0e-8, tolO = 1.0e-8, eval = None): '''Scaled conjugate gradient optimization. ''' - if display: print '\n***** starting optimization (SCG) *****\n' + if display: print('\n***** starting optimization (SCG) *****\n') nparams = len(x); # Check gradients if gradcheck: @@ -73,19 +76,19 @@ def run(f, x, args=(), niters = 100, gradcheck = False, display = 0, flog = Fals else: return x, listF - sigma = sigma0/sqrt(kappa) + sigma = old_div(sigma0,sqrt(kappa)) xplus = x + sigma*d gplus = f(xplus, *args)[1] gradCount += 1 - theta = (np.dot(d, (gplus - gradnew)))/sigma; + theta = old_div((np.dot(d, (gplus - gradnew))),sigma); # Increase effective curvature and evaluate step size alpha. delta = theta + beta*kappa if (delta <= 0): delta = beta*kappa - beta = beta - theta/kappa + beta = beta - old_div(theta,kappa) - alpha = - mu/delta + alpha = old_div(- mu,delta) # Calculate the comparison ratio. xnew = x + alpha*d @@ -158,7 +161,7 @@ def run(f, x, args=(), niters = 100, gradcheck = False, display = 0, flog = Fals nsuccess = 0; else: if (success == 1): - gamma = np.dot((gradold - gradnew), gradnew)/(mu) + gamma = old_div(np.dot((gradold - gradnew), gradnew),(mu)) d = gamma*d - gradnew; j += 1 @@ -167,7 +170,7 @@ def run(f, x, args=(), niters = 100, gradcheck = False, display = 0, flog = Fals # iterations. # options(8) = fold; if (display): - print "maximum number of iterations reached" + print("maximum number of iterations reached") if eval is not None: return x, listF, evalList, time else: diff --git a/pyGPs/Testing/unit_test_cov.py b/pyGPs/Testing/unit_test_cov.py index f369362..6f75788 100644 --- a/pyGPs/Testing/unit_test_cov.py +++ b/pyGPs/Testing/unit_test_cov.py @@ -1,3 +1,5 @@ +from __future__ import print_function +from builtins import range #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -52,7 +54,7 @@ def checkCovOutput(self, train_train, train_test, self_test): def is_positive_semi_definite(self, K): v = np.linalg.eig(K)[0] if any(v.real<=-1e-9): - print v.real.min() + print(v.real.min()) return False else: return True @@ -71,7 +73,7 @@ def checkCovariance(self, k): k2 = k.getCovMatrix(x=self.x, z=self.z, mode='cross') # test train by test covariance k3 = k.getCovMatrix(z=self.z, mode='self_test') # test test by test self covariance self.checkCovOutput(k1,k2,k3) - for der in xrange(len(k.hyp)): # checking derivatives for each hyperparameter + for der in range(len(k.hyp)): # checking derivatives for each hyperparameter kd1 = k.getDerMatrix(x=self.x, mode='train',der=der) # test train by train derivative kd2 = k.getDerMatrix(x=self.x, z=self.z, mode='cross',der=der) # test train by test derivative kd3 = k.getDerMatrix(z=self.z, mode='self_test',der=der) # test test by test self derivative @@ -79,75 +81,75 @@ def checkCovariance(self, k): def test_covSM(self): - print "testing covSM..." + print("testing covSM...") k = pyGPs.cov.SM(Q=10,D=self.x.shape[1]) self.checkCovariance(k) def test_covGabor(self): - print "testing covGabor..." + print("testing covGabor...") k = pyGPs.cov.Gabor() self.checkCovariance(k) def test_covRBF(self): - print "testing covRBF..." + print("testing covRBF...") k = pyGPs.cov.RBF() self.checkCovariance(k) def test_covRBFunit(self): - print "testing covRBFunit..." + print("testing covRBFunit...") k = pyGPs.cov.RBFunit() self.checkCovariance(k) def test_covRBFard(self): - print "testing covRBFard..." + print("testing covRBFard...") k = pyGPs.cov.RBFard(D=self.x.shape[1]) self.checkCovariance(k) def test_covConst(self): - print "testing covConst..." + print("testing covConst...") k = pyGPs.cov.Const() self.checkCovariance(k) def test_covLinear(self): - print "testing covLinear..." + print("testing covLinear...") k = pyGPs.cov.Linear() self.checkCovariance(k) def test_covLINard(self): - print "testing covLINard..." + print("testing covLINard...") k = pyGPs.cov.LINard(D=self.x.shape[1]) self.checkCovariance(k) def test_covMatern(self): - print "testing covMatern..." - print "...d = 1..." + print("testing covMatern...") + print("...d = 1...") k = pyGPs.cov.Matern(d=1) self.checkCovariance(k) - print "testing covMatern..." - print "...d = 3..." + print("testing covMatern...") + print("...d = 3...") k = pyGPs.cov.Matern(d=3) self.checkCovariance(k) - print "testing covMatern..." - print "...d = 5..." + print("testing covMatern...") + print("...d = 5...") k = pyGPs.cov.Matern(d=5) self.checkCovariance(k) - print "testing covMatern..." - print "...d = 7..." + print("testing covMatern...") + print("...d = 7...") k = pyGPs.cov.Matern(d=7) self.checkCovariance(k) def test_covPeriodic(self): - print "testing covPeriodic..." + print("testing covPeriodic...") k = pyGPs.cov.Periodic() n = 20 # number of training inputs nn = 10 # number of test inputs @@ -160,7 +162,7 @@ def test_covPeriodic(self): self.assertTrue(k1.shape == (n,n)) self.assertTrue(k2.shape == (n,nn)) self.assertTrue(k3.shape == (nn,1)) - for der in xrange(len(k.hyp)): # checking derivatives for each hyperparameter + for der in range(len(k.hyp)): # checking derivatives for each hyperparameter kd1 = k.getDerMatrix(x=x, mode='train',der=der) # test train by train derivative kd2 = k.getDerMatrix(x=x, z=z, mode='cross',der=der) # test train by test derivative kd3 = k.getDerMatrix(z=z, mode='self_test',der=der) # test test by test self derivative @@ -169,25 +171,25 @@ def test_covPeriodic(self): def test_covNoise(self): - print "testing covNoise..." + print("testing covNoise...") k = pyGPs.cov.Noise() self.checkCovariance(k) def test_covRQ(self): - print "testing covRQ..." + print("testing covRQ...") k = pyGPs.cov.RQ() self.checkCovariance(k) def test_covRQard(self): - print "testing covRQard..." + print("testing covRQard...") k = pyGPs.cov.RQard(D=self.x.shape[1]) self.checkCovariance(k) def test_covPre(self): - print "testing covPre..." + print("testing covPre...") k = pyGPs.cov.Pre(self.M1, self.M2) # load precomputed kernel matrix k1 = k.getCovMatrix(x=self.x, mode='train') # test train by train covariance k2 = k.getCovMatrix(x=self.x, z=self.z, mode='cross') # test train by test covariance @@ -197,7 +199,7 @@ def test_covPre(self): self.assertTrue(k1.shape == (n,n)) self.assertTrue(k2.shape == (n,nn)) self.assertTrue(k3.shape == (nn,1)) - for der in xrange(len(k.hyp)): # checking derivatives for each hyperparameter + for der in range(len(k.hyp)): # checking derivatives for each hyperparameter kd1 = k.getDerMatrix(x=self.x, mode='train',der=der) # test train by train derivative kd2 = k.getDerMatrix(x=self.x, z=self.z, mode='cross',der=der) # test train by test derivative kd3 = k.getDerMatrix(z=self.z, mode='self_test',der=der) # test test by test self derivative @@ -205,37 +207,37 @@ def test_covPre(self): def test_covPiecePoly(self): - print "testing covPiecePoly..." + print("testing covPiecePoly...") k = pyGPs.cov.PiecePoly() self.checkCovariance(k) def test_covPoly(self): - print "testing covPoly..." + print("testing covPoly...") k = pyGPs.cov.Poly() self.checkCovariance(k) def test_covScale(self): - print "testing (composing kernel) muliply by a scalar..." + print("testing (composing kernel) muliply by a scalar...") k = pyGPs.cov.RBF()*5 self.checkCovariance(k) def test_covSum(self): - print "testing (composing kernel) sum of two kernels..." + print("testing (composing kernel) sum of two kernels...") k = pyGPs.cov.RBF() + pyGPs.cov.PiecePoly() self.checkCovariance(k) def test_covProduct(self): - print "testing (composing kernel) product of two kernels..." + print("testing (composing kernel) product of two kernels...") k = pyGPs.cov.RBF() * pyGPs.cov.PiecePoly() self.checkCovariance(k) def test_covFITC(self): - print "testing FITC kernel to be used with sparse GP..." + print("testing FITC kernel to be used with sparse GP...") n,D = self.x.shape nn,D = self.z.shape nu = 5 # number of inducing points @@ -252,7 +254,7 @@ def test_covFITC(self): self.assertTrue(k2.shape == (nu,nn)) self.assertTrue(k3.shape == (nn,1)) - for der in xrange(len(k.hyp)): + for der in range(len(k.hyp)): Kd, Kuud, Kud = k.getDerMatrix(x=self.x, mode='train',der=der) # test train by train derivative kd2 = k.getDerMatrix(x=self.x, z=self.z, mode='cross',der=der) # test train by test derivative kd3 = k.getDerMatrix(z=self.z, mode='self_test',der=der) # test test by test self derivative @@ -271,6 +273,6 @@ def test_cov_new(self): ''' if __name__ == "__main__": - print "Running unit tests..." + print("Running unit tests...") unittest.main() diff --git a/pyGPs/Testing/unit_test_inf.py b/pyGPs/Testing/unit_test_inf.py index eb71e53..d338c14 100644 --- a/pyGPs/Testing/unit_test_inf.py +++ b/pyGPs/Testing/unit_test_inf.py @@ -1,3 +1,4 @@ +from __future__ import print_function #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -56,7 +57,7 @@ def checkFITCOutput(self, post, nlZ, dnlZ): def test_infExact(self): - print "testing exact inference..." + print("testing exact inference...") inffunc = pyGPs.inf.Exact() meanfunc = pyGPs.mean.Zero() covfunc = pyGPs.cov.RBF() @@ -66,7 +67,7 @@ def test_infExact(self): def test_infFITC_Exact(self): - print "testing FITC inference..." + print("testing FITC inference...") inffunc = pyGPs.inf.FITC_Exact() meanfunc = pyGPs.mean.Zero() covfunc = pyGPs.cov.RBF().fitc(self.u) @@ -76,7 +77,7 @@ def test_infFITC_Exact(self): def test_infEP(self): - print "testing EP inference..." + print("testing EP inference...") inffunc = pyGPs.inf.EP() meanfunc = pyGPs.mean.Zero() covfunc = pyGPs.cov.RBF() @@ -86,7 +87,7 @@ def test_infEP(self): def test_infFITC_EP(self): - print "testing FITC EP inference..." + print("testing FITC EP inference...") inffunc = pyGPs.inf.FITC_EP() meanfunc = pyGPs.mean.Zero() covfunc = pyGPs.cov.RBF().fitc(self.u) @@ -96,7 +97,7 @@ def test_infFITC_EP(self): def test_infLaplace(self): - print "testing Laplace inference..." + print("testing Laplace inference...") inffunc = pyGPs.inf.Laplace() meanfunc = pyGPs.mean.Zero() covfunc = pyGPs.cov.RBF() @@ -106,7 +107,7 @@ def test_infLaplace(self): def test_infFITC_Laplace(self): - print "testing FITC EP inference..." + print("testing FITC EP inference...") inffunc = pyGPs.inf.FITC_Laplace() meanfunc = pyGPs.mean.Zero() covfunc = pyGPs.cov.RBF().fitc(self.u) @@ -126,6 +127,6 @@ def test_inf_new(self): if __name__ == "__main__": - print "Running unit tests..." + print("Running unit tests...") unittest.main() diff --git a/pyGPs/Testing/unit_test_lik.py b/pyGPs/Testing/unit_test_lik.py index 1f007c7..3b91191 100644 --- a/pyGPs/Testing/unit_test_lik.py +++ b/pyGPs/Testing/unit_test_lik.py @@ -1,3 +1,5 @@ +from __future__ import print_function +from builtins import range #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [marthaler at ge dot com] @@ -58,40 +60,40 @@ def checkDerivativeEP(self, dlZhyp): def checkLikelihood(self, likelihood): - print "predictive mode" + print("predictive mode") lp, ymu, ys2 = likelihood.evaluate(y=self.y, mu=self.mu, s2=self.s2, nargout=3) self.checkPrediction(lp, ymu, ys2) - print "inference mode(Laplace inference)" + print("inference mode(Laplace inference)") lp,dlp,d2lp,d3lp = likelihood.evaluate(y=self.y, mu=self.mu, s2=self.s2, inffunc=pyGPs.inf.Laplace(), nargout=4) self.checkInferenceLaplace(lp,dlp,d2lp,d3lp) - for der in xrange(len(likelihood.hyp)): + for der in range(len(likelihood.hyp)): lp_dhyp,dlp_dhyp,d2lp_dhyp = likelihood.evaluate(y=self.y, mu=self.mu, s2=self.s2, inffunc=pyGPs.inf.Laplace(), der=der, nargout=4) self.checkDerivativeLaplace(lp_dhyp,dlp_dhyp,d2lp_dhyp) - print "inference mode(EP inference)" + print("inference mode(EP inference)") lp,dlp,d2lp = likelihood.evaluate(y=self.y, mu=self.mu, s2=self.s2, inffunc=pyGPs.inf.EP(), nargout=3) self.checkInferenceEP(lp,dlp,d2lp) - for der in xrange(len(likelihood.hyp)): + for der in range(len(likelihood.hyp)): dlZhyp = likelihood.evaluate(y=self.y, mu=self.mu, s2=self.s2, inffunc=pyGPs.inf.EP(), der=der, nargout=3) self.checkDerivativeEP(dlZhyp) def test_likGauss(self): - print "testing Gaussian likelihood..." + print("testing Gaussian likelihood...") likelihood = pyGPs.lik.Gauss() self.checkLikelihood(likelihood) def test_likErf(self): - print "testing error function(cumulative Gaussian) likelihood..." + print("testing error function(cumulative Gaussian) likelihood...") likelihood = pyGPs.lik.Erf() self.checkLikelihood(likelihood) def test_likLaplace(self): - print "testing Laplacian likelihood..." + print("testing Laplacian likelihood...") likelihood = pyGPs.lik.Laplace() self.checkLikelihood(likelihood) @@ -105,6 +107,6 @@ def test_cov_new(self): if __name__ == "__main__": - print "Running unit tests..." + print("Running unit tests...") unittest.main() diff --git a/pyGPs/Testing/unit_test_mean.py b/pyGPs/Testing/unit_test_mean.py index 595fcb9..6bdfda6 100644 --- a/pyGPs/Testing/unit_test_mean.py +++ b/pyGPs/Testing/unit_test_mean.py @@ -1,3 +1,5 @@ +from __future__ import print_function +from builtins import range #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -37,56 +39,56 @@ def checkDerOutput(self, derivative): def checkMean(self, m): mean = m.getMean(x=self.x) # get mean self.checkMeanOutput(mean) - for der in xrange(len(m.hyp)): # get derivatives + for der in range(len(m.hyp)): # get derivatives derivative = m.getDerMatrix(x=self.x, der=der) self.checkDerOutput(derivative) def test_meanZero(self): - print "testing meanZero..." + print("testing meanZero...") m = pyGPs.mean.Zero() self.checkMean(m) def test_meanLinear(self): - print "testing meanLinear..." + print("testing meanLinear...") m = pyGPs.mean.Linear(D=self.x.shape[1]) self.checkMean(m) def test_meanOne(self): - print "testing meanOne..." + print("testing meanOne...") m = pyGPs.mean.One() self.checkMean(m) def test_meanConst(self): - print "testing meanConst..." + print("testing meanConst...") m = pyGPs.mean.Const() self.checkMean(m) def test_meanScale(self): - print "testing (compositing mean) muliply by a scalar..." + print("testing (compositing mean) muliply by a scalar...") m = pyGPs.mean.One() * 6 self.checkMean(m) def test_meanSum(self): - print "testing (compositing mean) sum of two means..." + print("testing (compositing mean) sum of two means...") m = pyGPs.mean.One() + pyGPs.mean.Const() self.checkMean(m) def test_meanProduct(self): - print "testing (compositing mean) product of two means..." + print("testing (compositing mean) product of two means...") m = pyGPs.mean.One() * pyGPs.mean.Const() self.checkMean(m) def test_meanPower(self): - print "testing (compositing mean) power of a mean..." + print("testing (compositing mean) power of a mean...") m = pyGPs.mean.Const() ** 2 self.checkMean(m) @@ -102,6 +104,6 @@ def test_mean_new(self): if __name__ == "__main__": - print "Running unit tests..." + print("Running unit tests...") unittest.main() diff --git a/pyGPs/Testing/unit_test_model.py b/pyGPs/Testing/unit_test_model.py index 7d45d3b..c54664c 100644 --- a/pyGPs/Testing/unit_test_model.py +++ b/pyGPs/Testing/unit_test_model.py @@ -1,3 +1,5 @@ +from __future__ import print_function +from builtins import zip #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -35,7 +37,7 @@ def setUp(self): self.yc = clsData['y'] # training target self.zc = clsData['xstar'] # test data u1,u2 = np.meshgrid(np.linspace(-2,2,5),np.linspace(-2,2,5)) - self.uc = np.array(zip(np.reshape(u2,(np.prod(u2.shape),)),np.reshape(u1,(np.prod(u1.shape),)))) # inducing points + self.uc = np.array(list(zip(np.reshape(u2,(np.prod(u2.shape),)),np.reshape(u1,(np.prod(u1.shape),))))) # inducing points def checkRegressionOutput(self, model): @@ -55,7 +57,7 @@ def checkClassificationOutput(self, model): def test_GPR(self): - print "testing GP regression..." + print("testing GP regression...") model = pyGPs.GPR() m = pyGPs.mean.Zero() k = pyGPs.cov.RBF() @@ -67,7 +69,7 @@ def test_GPR(self): def test_GPC(self): - print "testing GP classification..." + print("testing GP classification...") model = pyGPs.GPC() m = pyGPs.mean.Zero() k = pyGPs.cov.RBF() @@ -88,7 +90,7 @@ def test_GPC(self): def test_GPR_FITC(self): - print "testing GP sparse regression..." + print("testing GP sparse regression...") model = pyGPs.GPR_FITC() m = pyGPs.mean.Zero() k = pyGPs.cov.RBF() @@ -100,7 +102,7 @@ def test_GPR_FITC(self): def test_GPC_FITC(self): - print "testing GP sparse classification..." + print("testing GP sparse classification...") model = pyGPs.GPC_FITC() m = pyGPs.mean.Zero() k = pyGPs.cov.RBF() @@ -123,6 +125,6 @@ def test_GPC_FITC(self): if __name__ == "__main__": - print "Running unit tests(about 5 min)..." + print("Running unit tests(about 5 min)...") unittest.main() diff --git a/pyGPs/Testing/unit_test_opt.py b/pyGPs/Testing/unit_test_opt.py index a7cd325..219fab8 100644 --- a/pyGPs/Testing/unit_test_opt.py +++ b/pyGPs/Testing/unit_test_opt.py @@ -1,3 +1,4 @@ +from __future__ import print_function #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -37,33 +38,33 @@ def checkOptimizer(self, optimizer): # and optimalHyp is a flattened numpy array optimalHyp, funcValue = optimizer.findMin(self.x, self.y) self.assertTrue(funcValue < self.nlZ_beforeOpt) - print "optimal hyperparameters in flattened array:", optimalHyp + print("optimal hyperparameters in flattened array:", optimalHyp) def test_CG(self): - print "testing Conjugent gradient..." + print("testing Conjugent gradient...") optimizer = pyGPs.Core.opt.CG(self.model) self.checkOptimizer(optimizer) def test_BFGS(self): - print "testing quasi-Newton method of Broyden, Fletcher, Goldfarb, and Shanno (BFGS)..." + print("testing quasi-Newton method of Broyden, Fletcher, Goldfarb, and Shanno (BFGS)...") optimizer = pyGPs.Core.opt.BFGS(self.model) self.checkOptimizer(optimizer) def test_SCG(self): - print "testing Scaled conjugent gradient..." + print("testing Scaled conjugent gradient...") optimizer = pyGPs.Core.opt.SCG(self.model) self.checkOptimizer(optimizer) def test_Minimize(self): - print "testing minimize by Carl Rasmussen ..." + print("testing minimize by Carl Rasmussen ...") optimizer = pyGPs.Core.opt.Minimize(self.model) self.checkOptimizer(optimizer) @@ -81,6 +82,6 @@ def test_MyOptimizer(self): if __name__ == "__main__": - print "Running unit tests..." + print("Running unit tests...") unittest.main() diff --git a/pyGPs/Validation/__init__.py b/pyGPs/Validation/__init__.py index 0b19391..40b3f49 100644 --- a/pyGPs/Validation/__init__.py +++ b/pyGPs/Validation/__init__.py @@ -1,4 +1,5 @@ -import valid +from __future__ import absolute_import +from . import valid diff --git a/pyGPs/Validation/valid.py b/pyGPs/Validation/valid.py index 668eb44..5185b32 100644 --- a/pyGPs/Validation/valid.py +++ b/pyGPs/Validation/valid.py @@ -1,3 +1,6 @@ +from __future__ import division +from builtins import range +from past.utils import old_div #================================================================================ # Marion Neumann [marion dot neumann at uni-bonn dot de] # Daniel Marthaler [dan dot marthaler at gmail dot com] @@ -36,7 +39,7 @@ def k_fold_validation(x, y, K=10, randomise=False): n, D = x.shape assert n > K - for k in xrange(K): + for k in range(K): x_train = np.array([e for i, e in enumerate(x) if i % K != k]) x_test = np.array([e for i, e in enumerate(x) if i % K == k]) y_train = np.array([e for i, e in enumerate(y) if i % K != k]) @@ -53,10 +56,10 @@ def k_fold_index(n, K=10): :param K: number of folds ''' - for k in xrange(K): + for k in range(K): indice_train = [] indice_test = [] - for i in xrange(n): + for i in range(n): if i % K == k: indice_test.append(i) else: @@ -85,10 +88,10 @@ def ACC(predict,target): ''' n,D = target.shape count = 0. - for i in xrange(n): + for i in range(n): if predict[i,0] == target[i,0]: count += 1 - return count/n + return old_div(count,n) def Prec(predict,target): @@ -102,12 +105,12 @@ def Prec(predict,target): n,D = target.shape count_1 = 0. count_2 = 0. - for i in xrange(n): + for i in range(n): if predict[i,0] == 1: count_1 += 1 if target[i,0] == 1: count_2 += 1 - return count_2 / count_1 + return old_div(count_2, count_1) def Recall(predict,target): @@ -121,12 +124,12 @@ def Recall(predict,target): n,D = target.shape count_1 = 0. count_2 = 0. - for i in xrange(n): + for i in range(n): if target[i,0] == 1: count_1 += 1 if predict[i,0] == 1: count_2 += 1 - return count_2 / count_1 + return old_div(count_2, count_1) def NLPD(y, MU, S2): diff --git a/pyGPs/__init__.py b/pyGPs/__init__.py index 4dfa102..e6cda29 100644 --- a/pyGPs/__init__.py +++ b/pyGPs/__init__.py @@ -1,8 +1,9 @@ -import Optimization -import Validation -import GraphExtensions -from Core import * -from Core.gp import * +from __future__ import absolute_import +from . import Optimization +from . import Validation +from . import GraphExtensions +from .Core import * +from .Core.gp import * __all__ = ['Optimization', 'Validation', 'GraphExtensions', 'Core']