In [None]:
%pylab

In [None]:
%matplotlib inline

In [None]:
import GPy, scipy

## Kernels 

In [None]:
# build some input for evaluating the kernel
x = np.linspace(-1,1)

x.shape

In [None]:
# (GPy expects 2d input)
x = x[:,None]

In [None]:
kern = GPy.kern.RBF(1, variance=1., lengthscale=1.)
kern

In [None]:
kern.K(x)

In [None]:
plt.imshow(kern.K(x))
plt.colorbar()

samples function from $f \sim N \Big(0, \kappa(X)\Big)$:

In [None]:
plt.plot(scipy.stats.multivariate_normal.rvs(np.zeros(50), kern.K(x), size=10).T);

In [None]:
# increasing the input dimension allows for more covariates
# auto-relevance detection (ARD) allows for seperate lengthscale for each covariate

kern = GPy.kern.RBF(2, ARD=True)
kern

In [None]:
# add second covariate, with random values

x2 = np.zeros((50,2))
x2[:,0] = x[:,0]
x2[:,1] = np.random.normal(size=50)

plt.imshow(kern.K(x2))
plt.colorbar()

### operations supported on kernels:

In [None]:
k1 = GPy.kern.RBF(1, variance=1, lengthscale=.2)
k2 = GPy.kern.Cosine(1, lengthscale=.1)

In [None]:
plt.imshow(k2.K(x))
plt.colorbar()

sample from $f_2$

In [None]:
plt.plot(scipy.stats.multivariate_normal.rvs(np.zeros(50), k2.K(x), size=3).T);

####  addition

In [None]:
k = k1 + k2
k

In [None]:
plt.imshow(k.K(x))
plt.colorbar()

sample from $f = f_1 + f_2$

In [None]:
plt.plot(scipy.stats.multivariate_normal.rvs(np.zeros(50), k.K(x), size=3).T);

#### multiplication

In [None]:
k = k1*k2
k

In [None]:
plt.imshow(k.K(x))
plt.colorbar()

sample from $f = f_1 \times f_2$

In [None]:
plt.plot(scipy.stats.multivariate_normal.rvs(np.zeros(50), k.K(x), size=3).T);

## Models 

Generate data:

$z = \{f(X), y(X)\} \sim N\Bigg(0,\begin{pmatrix} \kappa(X) & \kappa(X) \\ \kappa(X) & \kappa(X) + \sigma^2_y I \end{pmatrix} \Bigg)$

In [None]:
# generate some data
kern = GPy.kern.RBF(1)
sigma = 5e-2

# generate covariance for f and y
cov = kern.K(np.tile(x[:,0], 2)[:,None])

# add iid noise to y block of covariance
cov[50:,50:] += sigma*np.eye(50)

# sample random variables and split into f and y
obs = scipy.stats.multivariate_normal.rvs(np.zeros(100), cov)
f, y = obs[:50], obs[50:][:,None]

plt.plot(x, f, label='f')
plt.scatter(x, y, label='y')

In [None]:
# build GP regression model
m = GPy.models.GPRegression(x, y, GPy.kern.RBF(1))
m

In [None]:
m.randomize()
m

In [None]:
m.optimize()
m

In [None]:
# model log-likelihood is the (inverse) objective used for optimization
m.log_likelihood()

In [None]:
m.plot()

In [None]:
m.plot_f()

In [None]:
# generate our own predictions
xpred = np.linspace(-3, 3)[:,None]

mu, var = m.predict(xpred)

# remove additional dimensions
mu = mu[:,0]
var = var[:,0]

std = np.sqrt(var)

plt.plot(xpred, mu)
plt.fill_between(xpred[:,0], mu-1.98*std, mu+1.98*std, alpha=.1)

plt.scatter(x, y)

In [None]:
# function predictions
xpred = np.linspace(-1.3, 1.3)[:,None]

mu, var = m.predict_noiseless(xpred)

# remove additional dimensions
mu = mu[:,0]
var = var[:,0]

std = np.sqrt(var)

plt.plot(xpred, mu)
plt.fill_between(xpred[:,0], mu-1.98*std, mu+1.98*std, alpha=.1)

plt.plot(x, f)

In [None]:
# test out of sample prediction
m = GPy.models.GPRegression(x[:25,:], y[:25,:])
m.randomize()
m.optimize()

xpred = np.linspace(-1.3, 1.3)[:,None]

mu, var = m.predict(xpred)

# remove additional dimensions
mu = mu[:,0]
var = var[:,0]

std = np.sqrt(var)

plt.plot(xpred, mu)
plt.fill_between(xpred[:,0], mu-1.98*std, mu+1.98*std, alpha=.1)

plt.scatter(x[:25], y[:25], color='C0')
plt.scatter(x[25:], y[25:])

## sparse models 

In [None]:
m = GPy.models.SparseGPRegression(x, y)
m.randomize()
m.optimize()
m

In [None]:
m.plot()

# Functional Significance Testing 

**References**:

* [Flexible Modelling of Genetic Effects on Function-Valued Traits](https://link.springer.com/chapter/10.1007/978-3-319-31957-5_7)
* [Detecting differential growth of microbial populations with Gaussian process regression](http://genome.cshlp.org/content/early/2016/11/18/gr.210286.116.abstract)
* [Gaussian Processes for Bayesian hypothesis tests on regression functions](http://proceedings.mlr.press/v38/benavoli15.html)

### Independent outputs

In [None]:
GPy.kern.IndependentOutputs?

In [None]:
x = np.zeros((100, 2))
x[:,0] = np.tile(np.linspace(-1,1), 2)
x[50:,1] = 1

knull = GPy.kern.RBF(1, lengthscale=.75)
k = GPy.kern.IndependentOutputs(knull.copy())

plt.imshow(k.K(x))

In [None]:
sigma = 1e-2

plt.figure(figsize=(8,4))
plt.subplot(121)
y = scipy.stats.multivariate_normal.rvs(np.zeros(100), k.K(x) + sigma*np.eye(100))[:,None]
plt.scatter(x[:50,0], y[:50])
plt.scatter(x[:50,0], y[50:])
plt.title('full model')

plt.subplot(122)
y = scipy.stats.multivariate_normal.rvs(np.zeros(100), knull.K(x) + sigma*np.eye(100))[:,None]
plt.scatter(x[:50,0], y[:50])
plt.scatter(x[:50,0], y[50:])
plt.title('null model')

In [None]:
m = GPy.models.GPRegression(x,y, k)
m.randomize()
m.optimize()
m

In [None]:
m.plot(fixed_inputs=[(1,0)])

In [None]:
mnull = GPy.models.GPRegression(x, y, knull)
mnull.randomize()
mnull.optimize()
mnull

In [None]:
llr = 2*(m.log_likelihood() - mnull.log_likelihood())
pval = 1-scipy.stats.chi2.cdf(llr, df=2)
llr, pval

In [None]:
ll = []

for _ in range(400):
    knull = GPy.kern.RBF(1, lengthscale=.75)
    y = scipy.stats.multivariate_normal.rvs(np.zeros(100), knull.K(x) + sigma*np.eye(100))[:,None]
    
    m = GPy.models.GPRegression(x,y, k)
    m.randomize()
    m.optimize()
    
    mnull = GPy.models.GPRegression(x, y, knull)
    mnull.randomize()
    mnull.optimize()
    
    llr = 2*(m.log_likelihood() - mnull.log_likelihood())
    
    ll.append(llr)

In [None]:
plt.hist(1-scipy.stats.chi2.cdf(ll, df=2))

In [None]:
ll = []

for _ in range(400):
    knull = GPy.kern.RBF(1, lengthscale=1)
    k = GPy.kern.IndependentOutputs(knull.copy())
    y = scipy.stats.multivariate_normal.rvs(np.zeros(100), k.K(x) + sigma*np.eye(100))[:,None]
    
    m = GPy.models.GPRegression(x,y, k)
    m.randomize()
    m.optimize()
    
    mnull = GPy.models.GPRegression(x, y, knull)
    mnull.randomize()
    mnull.optimize()
    
    llr = 2*(m.log_likelihood() - mnull.log_likelihood())
    
    ll.append(llr)

In [None]:
plt.hist(1-scipy.stats.chi2.cdf(ll, df=2))

### Covariate modeling

In [None]:
k = GPy.kern.RBF(2, ARD=True)

m = GPy.models.GPRegression(x, y, k)
m.randomize()
m.optimize()
m

#### additional covariates

In [None]:
# add covariates for two treatments, and their interaction

t = np.linspace(-1,1,25)

x = np.zeros((t.shape[0]*4, 4))
x[:,0] = np.tile(t, 4)
x[:,1] = np.tile(np.repeat([0,1], t.shape[0]), 2)
x[50:,2] = 1
x[:,3] = x[:,1] * x[:,2]

plt.imshow(x, aspect='auto')
plt.colorbar()

In [None]:
vmean, vt1, vt2, vint = 1.0, .8, .8, 1e-3

knull = GPy.kern.RBF(1, variance=vmean, name='mean') + \
        GPy.kern.RBF(2, variance=vt1, ARD=True, name='treatment1') + \
        GPy.kern.RBF(2, variance=vt2, ARD=True, active_dims=[0, 2], name='treatment2')
        
plt.imshow(knull.K(x))
plt.colorbar()

In [None]:
k = knull + GPy.kern.RBF(2, variance=vint, ARD=True, active_dims=[0, 3], name='interaction')
plt.imshow(k.K(x))
plt.colorbar()

In [None]:
k.randomize()

n = x.shape[0]
sigma = 5e-2

cov = np.tile(k.K(x), (2,2))
cov[:n, :n] += sigma*np.eye(n)

s = scipy.stats.multivariate_normal.rvs(np.zeros(2*n), cov)
y, f = s[:n], s[n:]

n = t.shape[0]
plt.scatter(x[:n, 0], y[:n])
plt.scatter(x[:n, 0], y[n:2*n])
plt.scatter(x[:n, 0], y[2*n:3*n])
plt.scatter(x[:n, 0], y[3*n:])

plt.plot(x[:n, 0], f[:n], label='f-mean')
plt.plot(x[:n, 0], f[n:2*n], label='f-treatment1')
plt.plot(x[:n, 0], f[2*n:3*n], label='f-treatment2')
plt.plot(x[:n, 0], f[3*n:], label='f-interaction')

plt.legend()

k

In [None]:
m = GPy.models.GPRegression(x, y[:,None], k.copy())
m.randomize()
m

In [None]:
m.optimize()
m

In [None]:
mu, var = m.predict(x)
mu = mu[:,0]
var = var[:,0]
std = np.sqrt(var)

for i in range(4):
    sl = slice(i*n, (i+1)*n)
    plt.plot(x[:n,0], mu[sl])
    plt.fill_between(x[:n,0], mu[sl]-1.98*std[sl], mu[sl]+1.98*std[sl], alpha=.1)
    
    plt.plot(x[:n,0], f[sl], '--', c='C%d'%i, label='f%d'%i)
    
plt.legend()

In [None]:
mnull = GPy.models.GPRegression(x[:,:3], y[:,None], knull.copy())
mnull.randomize()
mnull.optimize()
mnull

In [None]:
mu, var = mnull.predict(x[:,:3])
mu = mu[:,0]
var = var[:,0]
std = np.sqrt(var)

for i in range(4):
    sl = slice(i*n, (i+1)*n)
    plt.plot(x[:n,0], mu[sl])
    plt.fill_between(x[:n,0], mu[sl]-1.98*std[sl], mu[sl]+1.98*std[sl], alpha=.1)
    
    plt.plot(x[:n,0], f[sl], '--', c='C%d'%i)