In [None]:
%pylab

In [None]:
%matplotlib inline

In [None]:
import GPy, scipy

# Basics of *GPy* 

## Kernels 

In [None]:
# build some input for evaluating the kernel
x = np.linspace(-1,1)

# (GPy expects 2d input)
x = x[:,None]

In [None]:
kern = GPy.kern.RBF(1)
kern

In [None]:
plt.imshow(kern.K(x))
plt.colorbar()

In [None]:
# sample functions represented by K
plt.plot(scipy.stats.multivariate_normal.rvs(np.zeros(50), kern.K(x), size=10).T);

In [None]:
# increasing the input dimension allows for more covariates
# auto-relevance detection (ARD) allows for seperate lengthscale for each covariate

kern = GPy.kern.RBF(2, ARD=True)
kern

In [None]:
# add second covariate, with random values

x2 = np.zeros((50,2))
x2[:,0] = x[:,0]
x2[:,1] = np.random.normal(size=50)

plt.imshow(kern.K(x2))
plt.colorbar()

In [None]:
# operations supported on kernels
k1 = GPy.kern.RBF(1, variance=1, lengthscale=.2)
k2 = GPy.kern.Cosine(1, lengthscale=.1)

In [None]:
# addition
k = k1 + k2
k

In [None]:
plt.imshow(k.K(x))
plt.colorbar()

In [None]:
plt.plot(scipy.stats.multivariate_normal.rvs(np.zeros(50), k.K(x), size=3).T);

In [None]:
# multiplication
k = k1*k2
k

In [None]:
plt.imshow(k.K(x))
plt.colorbar()

In [None]:
plt.plot(scipy.stats.multivariate_normal.rvs(np.zeros(50), k.K(x), size=3).T);

## Models 

In [None]:
# generate some data
kern = GPy.kern.RBF(1)
sigma = 5e-2

# generate covariance for f and y
cov = kern.K(np.tile(x[:,0], 2)[:,None])
cov[50:,50:] = kern.K(x) + sigma*np.eye(50)

obs = scipy.stats.multivariate_normal.rvs(np.zeros(100), cov)
f, y = obs[:50], obs[50:][:,None]

plt.plot(x, f, label='f')
plt.scatter(x, y, label='y')

In [None]:
m = GPy.models.GPRegression(x, y, GPy.kern.RBF(1))
m

In [None]:
m.randomize()
m

In [None]:
m.optimize()
m

In [None]:
# model log-likelihood if the (inverse) objective used for optimization
m.log_likelihood()

In [None]:
m.plot()

In [None]:
# generate our own predictions
xpred = np.linspace(-3, 3)[:,None]

mu, var = m.predict(xpred)

# remove additional dimensions
mu = mu[:,0]
var = var[:,0]

std = np.sqrt(var)

plt.plot(xpred, mu)
plt.fill_between(xpred[:,0], mu-1.98*std, mu+1.98*std, alpha=.1)

plt.scatter(x, y)

In [None]:
# function predictions
xpred = np.linspace(-1.3, 1.3)[:,None]

mu, var = m.predict_noiseless(xpred)

# remove additional dimensions
mu = mu[:,0]
var = var[:,0]

std = np.sqrt(var)

plt.plot(xpred, mu)
plt.fill_between(xpred[:,0], mu-1.98*std, mu+1.98*std, alpha=.1)

plt.plot(x, f)

In [None]:
# test out of sample prediction
m = GPy.models.GPRegression(x[:25,:], y[:25,:])
m.randomize()
m.optimize()

xpred = np.linspace(-1.3, 1.3)[:,None]

mu, var = m.predict(xpred)

# remove additional dimensions
mu = mu[:,0]
var = var[:,0]

std = np.sqrt(var)

plt.plot(xpred, mu)
plt.fill_between(xpred[:,0], mu-1.98*std, mu+1.98*std, alpha=.1)

plt.scatter(x[:25], y[:25], color='C0')
plt.scatter(x[25:], y[25:])

## sparse models 

In [None]:
m = GPy.models.SparseGPRegression(x, y)
m.randomize()
m.optimize()
m

In [None]:
m.plot()

# Functional Significance Testing 

**References**:

* [Flexible Modelling of Genetic Effects on Function-Valued Traits](https://link.springer.com/chapter/10.1007/978-3-319-31957-5_7)
* [Detecting differential growth of microbial populations with Gaussian process regression](http://genome.cshlp.org/content/early/2016/11/18/gr.210286.116.abstract)
* [Gaussian Processes for Bayesian hypothesis tests on regression functions](http://proceedings.mlr.press/v38/benavoli15.html)

### Independent outputs

In [None]:
x = np.zeros((100, 2))
x[:,0] = np.tile(np.linspace(-1,1), 2)
x[50:,1] = 1

knull = GPy.kern.RBF(1)
k = GPy.kern.IndependentOutputs(GPy.kern.RBF(1))

plt.imshow(k.K(x))

In [None]:
sigma = 5e-1
# y = scipy.stats.multivariate_normal.rvs(np.zeros(100), k.K(x) + sigma*np.eye(100))[:,None]
y = scipy.stats.multivariate_normal.rvs(np.zeros(100), knull.K(x) + sigma*np.eye(100))[:,None]

plt.scatter(x[:50,0], y[:50])
plt.scatter(x[:50,0], y[50:])

In [None]:
m = GPy.models.GPRegression(x,y, k)
m.randomize()
m.optimize()
m

In [None]:
m.plot(fixed_inputs=[(1,0)])

In [None]:
mnull = GPy.models.GPRegression(x, y, knull)
mnull.randomize()
mnull.optimize()
mnull

In [None]:
llr = 2*(m.log_likelihood() - mnull.log_likelihood())
llr, 1-scipy.stats.chi2.cdf(llr, df=2)

### Covariate modeling

In [None]:
k = GPy.kern.RBF(2, ARD=True)

m = GPy.models.GPRegression(x, y, k)
m.randomize()
m.optimize()
m

#### additional covariates

In [None]:
# add covariates for two treatments, and their interaction

t = np.linspace(-1,1,25)

x = np.zeros((t.shape[0]*4, 4))
x[:,0] = np.tile(t, 4)
x[:,1] = np.tile(np.repeat([0,1], t.shape[0]), 2)
x[50:,2] = 1
x[:,3] = x[:,1] * x[:,2]

plt.imshow(x, aspect='auto')
plt.colorbar()

In [None]:
k = GPy.kern.RBF(4, ARD=True, lengthscale=[.1, 1,1,100])
plt.imshow(k.K(x))
plt.colorbar()

In [None]:
knull = GPy.kern.RBF(3, ARD=True)
plt.imshow(knull.K(x))
plt.colorbar()

In [None]:
n = x.shape[0]
sigma = 1e-3

y = scipy.stats.multivariate_normal.rvs(np.zeros(n), k.K(x) + sigma*np.eye(n))

n = t.shape[0]
plt.scatter(x[:n, 0], y[:n])
plt.scatter(x[:n, 0], y[n:2*n])
plt.scatter(x[:n, 0], y[2*n:3*n])
plt.scatter(x[:n, 0], y[3*n:])

# Additional operations 