In [None]:
%pylab

In [None]:
%matplotlib inline

In [None]:
import GPy, scipy

In [None]:
def gompertz(t,m,A,l):
    return A * np.exp(-np.exp(m*np.e/A*(l-t) + 1))

In [None]:
def add_subplot_axes(ax,rect,axisbg='w'):
    fig = plt.gcf()
    box = ax.get_position()
    width = box.width
    height = box.height
    inax_position  = ax.transAxes.transform(rect[0:2])
    transFigure = fig.transFigure.inverted()
    infig_position = transFigure.transform(inax_position)    
    x = infig_position[0]
    y = infig_position[1]
    width *= rect[2]
    height *= rect[3]  # <= Typo was here
    subax = fig.add_axes([x,y,width,height],axisbg=axisbg)
    x_labelsize = subax.get_xticklabels()[0].get_size()
    y_labelsize = subax.get_yticklabels()[0].get_size()
    x_labelsize *= rect[2]**0.5
    y_labelsize *= rect[3]**0.5
    subax.xaxis.set_tick_params(labelsize=x_labelsize)
    subax.yaxis.set_tick_params(labelsize=y_labelsize)
    return subax


In [None]:
def generateSample(mu, cov, nugget, length=50):
    noise = np.eye(mu.shape[0])*nugget
    
    return scipy.stats.multivariate_normal.rvs(mu,cov+noise).reshape((mu.shape[0]/length,length)).T

In [None]:
# kbase = GPy.kern.RBF(1,name='base')
# kbio = GPy.kern.Hierarchical([GPy.kern.RBF(1,name='base'), GPy.kern.RBF(1,name='bio', variance=.1)])
# ktech = GPy.kern.Hierarchical([GPy.kern.RBF(1,name='base'), GPy.kern.RBF(1,name='bio', variance=.1), GPy.kern.RBF(1,name='tech', variance=.05)])

In [None]:
nbatch = 4
nrep = 3
ntot = nbatch * nrep
nobs = 15
time = np.linspace(0,2, nobs)

x = np.zeros((nobs*ntot,3))
x1 = np.zeros((nobs*ntot,2))
x2 = np.zeros((nobs*ntot,2))

x[:,0] = x1[:,0] = x2[:,0] = np.tile((time-time.mean())/time.std(), ntot)
x[:,1] = x2[:,1] = np.repeat(np.arange(nbatch), nrep*nobs)
x[:,2] = x1[:,1] = np.repeat(np.arange(nbatch*nrep), nobs)

x3 = x.copy()

In [None]:
baseVariance, batchVariance, repVariance = .01, .005, .005

kbase = GPy.kern.RBF(1,name='base',variance=baseVariance)
kbatch = GPy.kern.IndependentOutputs(GPy.kern.RBF(1,name='batch', variance=0.01, lengthscale=.7), index_dim=-2);
krep = GPy.kern.IndependentOutputs(GPy.kern.RBF(1,name='replicate', variance=0.005, lengthscale=.7))

In [None]:
cov = np.zeros((x.shape[0]*4, x.shape[0]*4))

# cov = kbase.K(np.tile(x.T,4).T) #+ batchVariance + repVariance

print cov.shape

# cov[:,:x.shape[0]] = np.tile(kbase.K(x), 4).T
# cov[:x.shape[0],:] = np.tile(kbase.K(x), 4)
# cov[:x.shape[0],:] = kbase.K(x)
cov[x.shape[0]:2*x.shape[0],x.shape[0]:2*x.shape[0]] += kbatch.K(x) #- batchVariance
cov[x.shape[0]:2*x.shape[0],3*x.shape[0]:] += kbatch.K(x) #- batchVarianceA
cov[3*x.shape[0]:,x.shape[0]:2*x.shape[0]] += kbatch.K(x) #- batchVariance

cov[2*x.shape[0]:3*x.shape[0],2*x.shape[0]:3*x.shape[0]] += krep.K(x) #- repVariance
cov[2*x.shape[0]:3*x.shape[0],3*x.shape[0]:] += krep.K(x) #- repVariance
cov[3*x.shape[0]:,2*x.shape[0]:3*x.shape[0]] += krep.K(x) #- repVariance

cov[3*x.shape[0]:,3*x.shape[0]:] += kbatch.K(x) + krep.K(x) # - repVariance-  batchVariance

# equal variance
#cov[range(cov.shape[0]),range(cov.shape[0])] = np.diag(cov).max()

sampleCov = cov.copy()

plt.imshow(sampleCov)

In [None]:
samples = []

In [None]:
datasets = []

nsamp = 5

f = gompertz(np.tile(time, ntot), 2, 1, .4)
sigma = .001

for _ in range(nsamp):
    samples.append([])

    s = generateSample(np.tile(f, 4), sampleCov, sigma, nobs)
    datasets.append(s)

In [None]:
for s in datasets:
    y0, y2 , y1, y3 = np.array_split(s,4,1)
    
    plt.figure(figsize=(12,6))
    
    plt.subplot(241)
    plt.plot(x[:nobs,0], f[:nobs],c='k',lw=3)
    for j,z in enumerate(x[::nobs,1]):
            k = np.unique(x[:,1]).tolist().index(z)
            plt.plot(x[:nobs,0],y0[:,j],color='C%d'%k,alpha=.6);
            
    plt.subplot(245)
    plt.plot([x[:,0].min(),x[:,0].max()],[0,0],'k', lw=3)
    for j,z in enumerate(x[::nobs,1]):
            k = np.unique(x[:,1]).tolist().index(z)
            plt.plot(x[:nobs,0],y0[:,j]-y0.mean(1),color='C%d'%k,alpha=.6);
    
    plt.subplot(242)
    plt.plot(x[:nobs,0], f[:nobs],c='k',lw=3)
    for j,z in enumerate(x[::nobs,1]):
            k = np.unique(x[:,1]).tolist().index(z)
            plt.plot(x[:nobs,0],y1[:,j],color='C%d'%k,alpha=.6);
            
    plt.subplot(246)
    plt.plot([x[:,0].min(),x[:,0].max()],[0,0],'k', lw=3)
    for j,z in enumerate(x[::nobs,1]):
            k = np.unique(x[:,1]).tolist().index(z)
            plt.plot(x[:nobs,0],y1[:,j]-y1.mean(1),color='C%d'%k,alpha=.6);
    
    plt.subplot(243)
    plt.plot(x[:nobs,0], f[:nobs],c='k',lw=3)
    for j,z in enumerate(x[::nobs,1]):
            k = np.unique(x[:,1]).tolist().index(z)
            plt.plot(x[:nobs,0],y2[:,j],color='C%d'%k,alpha=.6);
            
    plt.subplot(247)
    plt.plot([x[:,0].min(),x[:,0].max()],[0,0],'k', lw=3)
    for j,z in enumerate(x[::nobs,1]):
            k = np.unique(x[:,1]).tolist().index(z)
            plt.plot(x[:nobs,0],y2[:,j]-y2.mean(1),color='C%d'%k,alpha=.6);
    
    plt.subplot(244)
    plt.plot(x[:nobs,0], f[:nobs],c='k',lw=3)
    for j,z in enumerate(x[::nobs,1]):
            k = np.unique(x[:,1]).tolist().index(z)
            plt.plot(x[:nobs,0],y3[:,j],color='C%d'%k,alpha=.6);
            
    plt.subplot(248)
    plt.plot([x[:,0].min(),x[:,0].max()],[0,0],'k', lw=3)
    for j,z in enumerate(x[::nobs,1]):
            k = np.unique(x[:,1]).tolist().index(z)
            plt.plot(x[:nobs,0],y3[:,j]-y3.mean(1),color='C%d'%k,alpha=.6);

In [None]:
class ModelFactory(object):
    
    def __init__(self, x, ):
        self.x = x
    
    def predictionKernel(self, k):
        """The kernel for predicting underlying function."""
        return k
    
    def buildKernel(self,):
        return GPy.kern.RBF(1)
    
    def batchTrain(self, y, size=3):
        best = None
        
        trained = []
        
        for s in range(size):
            m = self.train(y)
            
            trained.append(m)
            
            if best is None or best.log_likelihood() < m.log_likelihood():
                best = m
        
        return m, trained
    
    def train(self, y):
        k = self.buildKernel()
        
        m = GPy.models.GPRegression(self.x, y.T.reshape(y.shape[0]*y.shape[1] ,1), k)
        m.randomize()
        m.optimize()
        
        return m
    
class HierarchicalFactory(ModelFactory):
    
    def __init__(self, x, levels=2):
        ModelFactory.__init__(self,x)
        self.levels = levels
        
    def buildKernel(self):
        names = ['base'] + ['level%d'%i for i in range(self.levels)]
        kerns = [GPy.kern.RBF(1, name=n) for n in names]
        
        return GPy.kern.Hierarchical(kerns)
    
    def predictionKernel(self, k):
        return k.base

class OneLevelFactory(ModelFactory):
    
    def predictionKernel(self, k):
        return k.rbf
    
    def buildKernel(self,):
        return GPy.kern.Hierarchical([GPy.kern.RBF(1), GPy.kern.RBF(1)])
    
class TwoLevelFactory(OneLevelFactory):
    
    def buildKernel(self,):
        return GPy.kern.Hierarchical([GPy.kern.RBF(1), GPy.kern.RBF(1), GPy.kern.RBF(1)])

In [None]:
m0 = ModelFactory(x)
m1 = HierarchicalFactory(x1,1)
m2 = HierarchicalFactory(x2,1)
m3 = HierarchicalFactory(x3,2)

In [None]:
scipy.stats.norm.ppf(.975)

In [None]:
for s in datasets[:1]:

    plt.figure(figsize=(16,16))

    y0, y2 , y1, y3 = np.array_split(s,4,1)
    for l,y in enumerate([y0, y1, y2, y3]):

        for i,fact in enumerate([m0, m1, m2, m3]):
            m, _ = fact.batchTrain(y,size=10)

            mu,cov = m.predict_noiseless(fact.x[:nobs,:],full_cov=True,kern=fact.predictionKernel(m.kern))
            mu = mu[:,0]
            std = np.sqrt(cov.diagonal())
            diff = (mu - f[:nobs])

            plt.subplot(4,4,4*i+l+1)
            plt.title("y%d m%d" %(l, i))

            plt.plot(fact.x[:nobs,0], mu)
            plt.fill_between(fact.x[:nobs,0], mu-1.96*std, mu+1.96*std, alpha=.2)

            plt.plot(x[:nobs,0], y)
            #for j,z in enumerate(fact.x[::nobs,1]):
            #    k = np.unique(fact.x[:,1]).tolist().index(z)
            #    plt.plot(x[:nobs,0],y[:,j],color='C%d'%k,alpha=.6);
            plt.plot(x[:nobs,0], f[:nobs],color='k', lw=3)

    plt.tight_layout()

In [None]:
plt.plot(fact.x[:nobs,0], mu)
plt.fill_between(fact.x[:nobs,0], mu-1.96*std, mu+1.96*std, alpha=.2)
plt.plot(x[:nobs,0], f[:nobs],color='k', lw=3)

for j,z in enumerate(fact.x[::nobs,1]):
    k = np.unique(fact.x[:,1]).tolist().index(z)
    plt.plot(x[:nobs,0],y[:,j],color='C%d'%k,alpha=.4);

In [None]:
m

In [None]:
m, trained = m3.batchTrain(y3, size=10)

In [None]:
[t.log_likelihood() for t in trained]

In [None]:
for t in trained:
    print t

In [None]:
mu,cov = t.predict_noiseless(m3.x[:nobs,:],full_cov=True,kern=m3.predictionKernel(m.kern))
# mu,cov = t.predict(m3.x[:nobs,:],full_cov=True,kern=m3.predictionKernel(m.kern))
mu = mu[:,0]

plt.plot(m3.x[:nobs,0], mu)
plt.fill_between(m3.x[:nobs,0], mu-1.96*std, mu+1.96*std, alpha=.2)

for j,z in enumerate(m3.x[::nobs,1]):
    k = np.unique(m3.x[:,1]).tolist().index(z)
    plt.plot(m3.x[:nobs,0],y3[:,j],color='C%d'%k,alpha=.6);

In [None]:
m = m1.train(y3)
m

In [None]:
ax = m.plot_f(fixed_inputs=[(1,0)], predict_kw={'kern':m1.predictionKernel(m.kern)})
ax.plot(x3[:nobs,0], y3)
ax.plot(x[:nobs,0], f[:nobs],color='k', lw=3)

In [None]:
samples = []

In [None]:
for s in datasets:
    y0, y2 , y1, y3 = np.array_split(s,4,1)

    for y in [y0, y1, y2, y3]:
        
        samples[-1].append([])

        k0 = GPy.kern.RBF(1)
        k1 = GPy.kern.Hierarchical([GPy.kern.RBF(1), GPy.kern.RBF(1)])
        k2 = GPy.kern.Hierarchical([GPy.kern.RBF(1), GPy.kern.RBF(1)])
        k3 = GPy.kern.Hierarchical([GPy.kern.RBF(1), GPy.kern.RBF(1), GPy.kern.RBF(1)])

        for z,k, kpred in [(x, k0, k0), (x1, k1, k1.rbf), (x2, k2, k2.rbf), (x3, k3, k3.rbf)]:
            m = GPy.models.GPRegression(z, y.T.reshape(nobs*ntot,1), k)
            m.randomize()
            m.optimize()

            mu,cov = m.predict_noiseless(z[:nobs,:],full_cov=True,kern=kpred)
            std = np.sqrt(cov.diagonal())
            diff = (mu[:,0] - f[:nobs])

            incorrect = nobs-sum(((diff-1.98*std) < 0) & ((diff+1.98*std) > 0))
            
            samples[-1][-1].append(incorrect)
            
            del m

In [None]:
failrate = 1.*np.array(samples[:-2],dtype=float)/nobs

In [None]:
# failrate

In [None]:
# each figure is different model type
# each boxplot is generative type (m0-m3)

plt.figure()
plt.boxplot(np.array_split(failrate[:,:,0], 4, 1));

plt.figure()
plt.boxplot(np.array_split(failrate[:,:,1], 4, 1));

plt.figure()
plt.boxplot(np.array_split(failrate[:,:,2], 4, 1));

plt.figure()
plt.boxplot(np.array_split(failrate[:,:,3], 4, 1));

In [None]:
plt.plot(x[:50,0],diff)
plt.fill_between(x[:50,0], diff + 1.98*std, diff - 1.98*std, alpha=.1)
# plt.plot(x[:50,0],f[:50])

In [None]:
plt.plot(x[:50,0],mu[:,0])
plt.fill_between(x[:50,0], mu[:,0] + 1.98*std, mu[:,0] - 1.98*std, alpha=.1)
plt.plot(x[:50,0],f[:50])
plt.plot(x[:50,0],y2,c='k',lw=.4)

In [None]:
mu,cov = m.predict_noiseless(x[:50,:],full_cov=True)
std = np.sqrt(cov.diagonal())

In [None]:
sum(np.linalg.eigvals(cov) > 1e-9)

In [None]:
diff = (mu[:,0] - f[:50])

np.dot(diff, np.dot(np.linalg.inv(cov), diff))

In [None]:
diff.shape

In [None]:
cov

In [None]:
plt.plot(diff)
plt.plot(np.sqrt(cov));

In [None]:
scipy.stats.chi2.ppf(.95, 50)

In [None]:
scipy.stats.chi2.cdf(-2, 50)

In [None]:
scipy.stats.chi2.cdf(np.dot(diff, np.dot(np.linalg.inv(cov), diff)), 50)

In [None]:
std.shape, mu.shape

In [None]:
plt.scatter(x[:ntot*50,0], y1.T.reshape(50*ntot,1))