In [None]:
%pylab

In [None]:
%matplotlib inline

In [None]:
import pystan, pickle, popmachine, scipy
import pandas as pd

from pystan_cache.pystan_cache import caching_stan

In [None]:
tsamp083 = pickle.load(open('ura3-0.083mMPQ-batch-samples.pkl', 'rb'))
tsamp333 = pickle.load(open('ura3-0.333mMPQ-batch-samples.pkl', 'rb'))

In [None]:
tsamp333.keys()

In [None]:
plt.figure(figsize=(10,4))

for i in range(4):
    
    plt.subplot(1,5,i + 1)
    plt.title('alpha %d'%i)

    temp = tsamp083['alpha'][:,i]

    z = np.linspace(temp.min(), temp.max())
    kde = scipy.stats.gaussian_kde(temp)

    plt.plot(z, kde(z)/kde(z).max(), label='mM PQ = 0.083')
    
    temp = tsamp333['alpha'][:,i]

    z = np.linspace(temp.min(), temp.max())
    kde = scipy.stats.gaussian_kde(temp)

    plt.plot(z, kde(z)/kde(z).max(), label='mM PQ = 0.333')
    
    if i == 0:
        plt.legend()
    
    plt.semilogx()
    plt.xlim(.01, 4)
    
plt.subplot(1,5,i + 2)
plt.title('sigma')

temp = tsamp083['sigma']
z = np.linspace(temp.min(), temp.max())
kde = scipy.stats.gaussian_kde(temp)

plt.plot(z, kde(z)/kde(z).max(), label='sigma')

temp = tsamp333['sigma']
z = np.linspace(temp.min(), temp.max())
kde = scipy.stats.gaussian_kde(temp)

plt.plot(z, kde(z)/kde(z).max(), label='sigma')

plt.semilogx()
plt.xlim(.01, 1)
# plt.xticks([.17, .19])

plt.tight_layout()


plt.savefig('figures/ura3_PQ-alpha-stan.pdf', bbox_inches='tight')

In [None]:
maxapos, = np.where(tsamp['lp__'] == tsamp['lp__'].max())[0]
maxapos

In [None]:
gp_multi_sim = caching_stan.stan_model(file='stan-models/gp_multi_sim.stan')

In [None]:
gp_multi = caching_stan.stan_model(file='stan-models/gp_multi.stan')

In [None]:
machine = popmachine.Machine('sqlite:///../popmachine_local/.popmachine.db')

In [None]:
ds = machine.search(Strain='ura3', **{'mM PQ':[0.0, .083], 'M NaCl':[4.2, None]})

# plates = [u'20150517 PQ 3', u'20150715 PQ 8', u'20150702 PQ 6',
#        u'20150630 PQ 5', u'20150704 PQ 7', u'20150717 PQ 9']
# ds = machine.search(plates=plates, Strain='ura3', **{'mM PQ':[0.0, .083], 'M NaCl':[4.2, None]})

ds.log()
ds.filter()
ds.trim(5)
ds.poly_scale(2, groupby=['plate', 'mM PQ'])

ds.data = ds.data.iloc[::3,:]

In [None]:
plt.figure(figsize=(12,4))
ds.plot(columns=['plate'], colorby=['mM PQ'])

In [None]:
xraw ,y, design, labels = ds.build(['mM PQ', 'plate'],scale=True)

In [None]:
labels

In [None]:
ymean, ystd = y.mean(), y.std()

y = (y-y.mean())/y.std()

x = (xraw-xraw.mean())/xraw.std()

In [None]:
y.shape, x.shape

In [None]:
dm = np.zeros((y.shape[1], 2 + 2*ds.meta.plate.unique().shape[0]))
dm[:,0] = 1
# dm[:,1] = design['mM PQ'] != labels[0].index('0.0') #1 - 2*design['mM PQ']
dm[:,1] = 1 - 2*(design['mM PQ'] == labels[0].index('0.0'))
# dm[:,1] = 1 - 2*design['mM PQ']

for i in range(design.plate.unique().shape[0]):
    dm[:,2+i*2:4+i*2] = dm[:,:2] * (design.plate==i).values[:,None]

In [None]:
plt.imshow(dm, aspect='auto')

In [None]:
tsamp['length_scale'][maxapos]

In [None]:
tsamp['alpha'][maxapos]

In [None]:
tsamp['sigma'][maxapos]

In [None]:
p = dm.shape[1]
n = x.shape[0]
priors = [1, 2] + [3, 4] * ds.meta.plate.unique().shape[0]

sim_data = {
    'N': n,
    'x': x[:,0],
    'P':y.shape[1],
    'K':dm.shape[1],
    'L':4,
    'prior':priors,     
    'length_scale': tsamp['length_scale'][maxapos],
    'alpha': tsamp['alpha'][maxapos],
    'sigma': tsamp['sigma'][maxapos],
    'design': dm #[[1,1,0,0],[1,0,1,0],[1,0,0,1]]
}

In [None]:
sample = gp_multi_sim.sampling(sim_data, chains=1, iter=1, algorithm='Fixed_param').extract()

In [None]:
plt.figure(figsize=(9,9))

for i in range(ds.meta.plate.unique().shape[0]):
    s, = np.where(dm[:,2+i*2]!=0)
    
    plt.subplot(3,3,i+1)
    plt.plot(x[:,0], sample['y'][0,s,:].T)

In [None]:
plt.figure(figsize=(9,9))

g = ds.meta.groupby(['plate', 'mM PQ'])

keys = ds.meta.plate.unique().tolist()
for k, ind in g:
    temp = ds.data.iloc[:,ind.index]
    
    p, pq = k
    
    i = keys.index(p)
    
    plt.subplot(3,3,i+1)
   
    if pq == '0.0':
        plt.plot(temp.index, sample['y'][0,ind.index,:].T, c='k', label='mM PQ = 0.0')
    else:
        plt.plot(temp.index, sample['y'][0,ind.index,:].T, c='limegreen', label='mM PQ = 0.0')
#     elif pq == '0.333':
#         plt.plot(temp.index, temp.values, c='limegreen', label='mM PQ = 0.333')
#     else:
#         plt.plot(temp.index, temp.values, c='cyan', label='mM PQ = 0.083')
            
    if i % 3 == 0:
        plt.ylabel('AU', fontsize=14)
    if i > 5:
        plt.xlabel('time (AU)', fontsize=14)
            
    plt.ylim(sample['y'].min(),sample['y'].max())
    
plt.tight_layout()
plt.savefig('figures/ura3_0.083mM-PQ_simulated-data.pdf', bbox_inches='tight')

In [None]:
train_data = sim_data.copy()
train_data['y'] = sample['y'][0,:,:]

train_data['alpha_prior'] = [[1,1], [1,1], [.1,1], [.1,1]]
train_data['length_scale_prior'] = [[1.5,2]] * 4

train_data

In [None]:
1175./60 /500*2000

In [None]:
# tsamples = gp_multi.sampling(data=train_data, chains=2, iter=100, control = {'adapt_delta': 0.8})
tsamples = gp_multi.sampling(data=train_data, chains=4, iter=2000, control = {'adapt_delta': 0.8})

In [None]:
tsamples

In [None]:
summary = tsamples.summary()
summary = pd.DataFrame(summary['summary'], columns=summary['summary_colnames'], index=summary['summary_rownames'])

In [None]:
summary.head()

In [None]:
plt.figure(figsize=(10,4))
tsamples.traceplot(['length_scale', 'alpha', 'sigma','lp__'])
plt.tight_layout()

In [None]:
tsamp = tsamples.extract(permuted=True)

In [None]:
ncol = 5
nrow = int(1.*(dm.shape[1]-1)/ncol) + 1

plt.figure(figsize=(4*ncol, 4*nrow))

for i in range(dm.shape[1]):
    
    plt.subplot(dm.shape[1]/ncol + 1, ncol, i + 1)
    
    plt.plot(x, tsamp['f'][:,i,:].mean(0),)
    #plt.plot(x, tsamp['f'][:,i,:].T,c='k', alpha=.2)
    plt.fill_between(x[:,0], 
                     tsamp['f'][:,i,:].mean(0)-2*tsamp['f'][:,i,:].std(0),
                     tsamp['f'][:,i,:].mean(0)+2*tsamp['f'][:,i,:].std(0),alpha=.1)
    plt.plot([x.min(), x.max()], [0, 0], lw=3, c='k')
    
    plt.plot(x, sample['f'][0,i,:])
    
    #if i > 1:
    #    plt.ylim(-.48, .48)
    
    #plt.plot(x[train_ind],f[i,train_ind].T,'--')
    
plt.savefig('figures/ura3_0.083mM-PQ_simulation-functions.pdf', bbox_inches='tight')

In [None]:
for i in range(2,dm.shape[1]):
    
    plt.subplot(1, 2, i%2 + 1)
    
    plt.plot(xraw, tsamp['f'][:,i,:].mean(0),)
    #plt.plot(x, tsamp['f'][:,i,:].T,c='k', alpha=.2)
    plt.fill_between(xraw[:,0], 
                     tsamp['f'][:,i,:].mean(0)-2*tsamp['f'][:,i,:].std(0),
                     tsamp['f'][:,i,:].mean(0)+2*tsamp['f'][:,i,:].std(0),alpha=.1)
    
    #if i % 2 == 0:
    #    plt.ylim(-.32, .28)
    #else:
    #    plt.ylim(-.34, .48)
    plt.ylim(-1.6, .8)
    

ax = plt.subplot(121)
plt.title('$f_0$', fontsize=16)
# plt.legend(fontsize=12)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.ylabel('log(OD)', fontsize=16)
plt.xlabel('time (h)', fontsize=16)
plt.plot([xraw.min(), xraw.max()], [0, 0], lw=1, c='k')

# ax = add_subplot_axes(ax, (.2,.8,.3,.2))
# temp = tsamp['f'][:,2::2].sum(1)
# plt.plot(x, temp.mean(0),)
# #plt.plot(x, tsamp['f'][:,i,:].T,c='k', alpha=.2)
# plt.fill_between(x[:,0], 
#                  temp.mean(0)-2*temp.std(0),
#                  temp.mean(0)+2*temp.std(0),alpha=.1)


ax = plt.subplot(122)
plt.title('$f_1$', fontsize=16)
# plt.legend(fontsize=12)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.xlabel('time (h)', fontsize=16)
plt.plot([xraw.min(), xraw.max()], [0, 0], lw=1, c='k')

# ax = add_subplot_axes(ax, (.33,.8,.3,.2))
# temp = tsamp['f'][:,3::2].sum(1)
# plt.plot(x, temp.mean(0),)
# #plt.plot(x, tsamp['f'][:,i,:].T,c='k', alpha=.2)
# plt.fill_between(x[:,0], 
#                  temp.mean(0)-2*temp.std(0),
#                  temp.mean(0)+2*temp.std(0),alpha=.1)

plt.tight_layout()
# plt.savefig('figures/ura3_0.083mM-PQ_batchFunctions-stan.pdf', bbox_inches='tight')

In [None]:
plt.subplot(121)

temp = ((ystd*tsamp['f'][:,2::2])**2).sum(1)
temp.sort(0)

plt.plot(xraw, temp.mean(0),)
plt.fill_between(xraw[:,0], 
                 temp[int(.025*temp.shape[0]),:],
                 temp[int(.975*temp.shape[0]),:],alpha=.1)

plt.ylim(-.01, 1.4)
plt.title('$f_0$', fontsize=16)
plt.yticks(np.arange(.2, 1.4, .2), fontsize=12)
plt.xlabel('time (h)', fontsize=16)
plt.ylabel('log(OD)$^2$', fontsize=16)

plt.subplot(122)
temp = ((ystd*tsamp['f'][:,3::2])**2).sum(1)
temp.sort(0)

plt.plot(xraw, temp.mean(0),)
plt.fill_between(xraw[:,0], 
                 temp[int(.025*temp.shape[0]),:],
                 temp[int(.975*temp.shape[0]),:],alpha=.1)

plt.ylim(-.01, 1.4)

plt.title('$f_1$', fontsize=16)
plt.xticks(fontsize=12)
plt.yticks(np.arange(.2, 1.4, .2), fontsize=12)
plt.xlabel('time (h)', fontsize=16)


plt.tight_layout()
# plt.savefig('figures/ura3_0.083mM-PQ_finitePopVariance-stan.pdf', bbox_inches='tight')

In [None]:
plt.plot(tsamp['alpha'],alpha=.4)
plt.semilogy()
plt.legend()

In [None]:
for i in range(4):

    temp = tsamp['alpha'][:,i]

    z = np.linspace(temp.min(), temp.max())
    kde = scipy.stats.gaussian_kde(temp)

    plt.plot(z, kde(z)/kde(z).max(), label='alpha %d'%i)
    
    plt.scatter([sim_data['alpha'][i]], [-.1])
    
temp = tsamp['sigma']
z = np.linspace(temp.min(), temp.max())
kde = scipy.stats.gaussian_kde(temp)

plt.plot(z, kde(z)/kde(z).max(), label='sigma')
plt.scatter([sim_data['sigma']], [-.1])

plt.semilogx()    
plt.legend()

plt.savefig('figures/ura3_0.083mMPQ-simulations-alpha-stan.pdf', bbox_inches='tight')

In [None]:
for i in range(4):

    temp = tsamp['length_scale'][:,i]

    z = np.linspace(temp.min(), temp.max())
    kde = scipy.stats.gaussian_kde(temp)
    
    plt.scatter([sim_data['length_scale'][i]], [-.1])


    plt.plot(z, kde(z), label='length_scale %d'%i)

plt.semilogx()    
plt.legend()

# null model 

In [None]:
null_train_data = train_data.copy()
null_train_data['design'] = train_data['design'][:,:2]
null_train_data['prior'] = null_train_data['prior'][:2]
null_train_data['length_scale_prior'] = null_train_data['length_scale_prior'][:2]
null_train_data['alpha_prior'] = null_train_data['alpha_prior'][:2]
null_train_data['K'] = null_train_data['L'] = 2

In [None]:
nullSamples = gp_multi.sampling(data=null_train_data, chains=4, iter=2000, control = {'adapt_delta': 0.8})

In [None]:
nullSamples

In [None]:
plt.figure(figsize=(10,4))
nullSamples.traceplot(['length_scale', 'alpha', 'sigma','lp__'])
plt.tight_layout()

In [None]:
nsamp = nullSamples.extract(permuted=True)

In [None]:
ncol = 2
nrow = int(1.*(dm.shape[1]-1)/ncol) + 1

plt.figure(figsize=(4*ncol, 4*nrow))

for i in range(2):
    
    plt.subplot(dm.shape[1]/ncol + 1, ncol, i + 1)
    
    plt.plot(x, nsamp['f'][:,i,:].mean(0),)
    #plt.plot(x, tsamp['f'][:,i,:].T,c='k', alpha=.2)
    plt.fill_between(x[:,0], 
                     nsamp['f'][:,i,:].mean(0)-2*nsamp['f'][:,i,:].std(0),
                     nsamp['f'][:,i,:].mean(0)+2*nsamp['f'][:,i,:].std(0),alpha=.1)
    plt.plot([x.min(), x.max()], [0, 0], lw=3, c='k')
    
    if i > 1:
        plt.ylim(-.48, .48)
    
    #plt.plot(x[train_ind],f[i,train_ind].T,'--')

In [None]:
for i in range(2):

    temp = nsamp['alpha'][:,i]

    z = np.linspace(temp.min()*.7, temp.max()*1.3)
    kde = scipy.stats.gaussian_kde(temp)


    plt.plot(z, kde(z), label='alpha %d'%i)

plt.semilogx()    
plt.legend()

In [None]:
for i in range(2):

    temp = nsamp['length_scale'][:,i]

    z = np.linspace(temp.min(), temp.max())
    kde = scipy.stats.gaussian_kde(temp)


    plt.plot(z, kde(z), label='length_scale %d'%i)

plt.semilogx()    
plt.legend()

In [None]:
temp = ystd*2*tsamp['f'][:,1]

# plt.plot(x, (2*temp).mean(0),)
# plt.fill_between(x[:,0], 
#                  (2*temp).mean(0)-2*(2*temp).std(0),
#                  (2*temp).mean(0)+2*(2*temp).std(0),alpha=.1)

plt.plot(xraw, (temp).mean(0), label='$M_2$')
plt.fill_between(xraw[:,0], 
                 (temp).mean(0)-2*(temp).std(0),
                 (temp).mean(0)+2*(temp).std(0),alpha=.3)

temp = ystd*2*nsamp['f'][:,1]

plt.plot(xraw, temp.mean(0), label='$M_0$')
plt.fill_between(xraw[:,0], 
                 temp.mean(0)-2*temp.std(0),
                 temp.mean(0)+2*temp.std(0),alpha=.3)

# plt.plot(xraw, ds.data.loc[:,ds.meta['mM PQ'] != '0.0'].mean(1) - ds.data.loc[:,ds.meta['mM PQ'] == '0.0'].mean(1));

plt.plot([xraw.min(), xraw.max()], [0, 0], lw=3, c='k')

plt.legend(fontsize=12)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.ylabel('log(OD)', fontsize=16)
plt.xlabel('time (h)', fontsize=16)

plt.plot()

plt.savefig('figures/ura3_0.083mM-PQ_f1_m02-stan.pdf', bbox_inches='tight')

In [None]:
plt.plot(ds.data.loc[:,ds.meta['mM PQ'] != '0.0'].mean(1) - ds.data.loc[:,ds.meta['mM PQ'] == '0.0'].mean(1));

In [None]:
temp = ystd*tsamp['f'][:,0]

# plt.plot(x, (2*temp).mean(0),)
# plt.fill_between(x[:,0], 
#                  (2*temp).mean(0)-2*(2*temp).std(0),
#                  (2*temp).mean(0)+2*(2*temp).std(0),alpha=.1)

plt.plot(xraw, (temp).mean(0), label='$M_2$')
plt.fill_between(xraw[:,0], 
                 (temp).mean(0)-2*(temp).std(0),
                 (temp).mean(0)+2*(temp).std(0),alpha=.3)

temp = ystd*nsamp['f'][:,0]

plt.plot(xraw, temp.mean(0), label='$M_0$')
plt.fill_between(xraw[:,0], 
                 temp.mean(0)-2*temp.std(0),
                 temp.mean(0)+2*temp.std(0),alpha=.3)

plt.plot([xraw.min(), xraw.max()], [0, 0], lw=3, c='k')

plt.legend(fontsize=12)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.ylabel('log(OD)', fontsize=16)
plt.xlabel('time (h)', fontsize=16)

# plt.savefig('figures/ura3_0.083mM-PQ_f0_m02-stan.pdf', bbox_inches='tight')