In [None]:
%pylab

In [None]:
%matplotlib inline

In [None]:
import pystan, pickle, popmachine, scipy, os
import pandas as pd

from pystan_cache.pystan_cache import caching_stan

In [None]:
gp_multi = caching_stan.stan_model(file='stan-models/gp_multi.stan')
gp_multi_marginal = caching_stan.stan_model(file='stan-models/gp_multi_marginal.stan')

In [None]:
machine = popmachine.Machine('sqlite:///../popmachine_local/.popmachine.db')

plates = [u'20161010_PQ_osmo', u'20150517 PQ 3', u'20161107_PQ_osmo_combo',
       u'20150715 PQ 8', u'20150702 PQ 6', u'20150607 PQ 4',
       u'20150630 PQ 5', u'20150704 PQ 7', u'20150717 PQ 9']

In [None]:
ds = machine.search(plates=[plates[1]], Strain='ura3', **{'mM PQ':[0.0, .083], 'M NaCl':[4.2, None]})

# plates = [u'20150517 PQ 3', u'20150715 PQ 8', u'20150702 PQ 6',
#        u'20150630 PQ 5', u'20150704 PQ 7', u'20150717 PQ 9']
# ds = machine.search(plates=plates, Strain='ura3', **{'mM PQ':[0.0, .083], 'M NaCl':[4.2, None]})

ds.log()
ds.filter()
ds.trim(5)
ds.poly_scale(2, groupby=['plate', 'mM PQ'])

ds.data = ds.data.iloc[::3,:]

In [None]:
plt.figure(figsize=(12,4))
ds.plot(columns=['plate'], colorby=['mM PQ'])

In [None]:
xraw ,y, design, labels = ds.build(['mM PQ', 'plate'],scale=True)

In [None]:
labels

In [None]:
ymean, ystd = y.mean(), y.std()

y = (y-y.mean())/y.std()

x = (xraw-xraw.mean())/xraw.std()

In [None]:
y.shape, x.shape

In [None]:
dm = np.zeros((y.shape[1], 2))
dm[:,0] = 1
dm[:,1] = design['mM PQ'] != labels[0].index('0.0')

In [None]:
plt.imshow(dm, aspect='auto')

In [None]:
# random effect

p = dm.shape[1]
n = x.shape[0]

# design = np.zeros((p, 1+p))
# design[:,0] = 1
# design[:,1:] = np.eye(p)

priors = [1, 2]

train_data = {
    'N': n,
    'P':y.shape[1],
    'K':dm.shape[1],
    'L':2,
    'prior':priors,     
    'design': dm
}

train_data['alpha_prior'] = [[1,1], [.1, 1]]
train_data['length_scale_prior'] = [[.5,.5], [.5, .5]]
train_data['sigma_prior'] = [.1,1.5]

train_data['marginal_alpha_prior'] = [.5, .1]
train_data['marginal_lengthscale_prior'] = [8, 2.0]

train_data['y'] = y.T
train_data['x'] = x[:,0]

In [None]:
tsamples = gp_multi_marginal.sampling(data=train_data, chains=4, iter=2000, control = {'adapt_delta': 0.8})

In [None]:
tsamples

In [None]:
summary = tsamples.summary()
summary = pd.DataFrame(summary['summary'], columns=summary['summary_colnames'], index=summary['summary_rownames'])

In [None]:
summary.head()

In [None]:
neff = []
rhat = []
for i in range(dm.shape[1]):
    neff.append(summary.loc[summary.index.str.match('f\[%d,[0-9]*\]'%i), 'n_eff'].values)
    rhat.append(summary.loc[summary.index.str.match('f\[%d,[0-9]*\]'%i), 'Rhat'].values)

In [None]:
plt.boxplot(neff);

In [None]:
plt.boxplot(rhat);

In [None]:
plt.figure(figsize=(10,4))
tsamples.traceplot(['length_scale', 'alpha', 'sigma','lp__'])
plt.tight_layout()

In [None]:
plt.figure(figsize=(20,4))
tsamples.traceplot(['marginal_alpha', 'marginal_lengthscale','lp__'])
plt.tight_layout()

In [None]:
tsamp = tsamples.extract(permuted=True)

In [None]:
for i in range(2):

    temp = tsamp['alpha'][:,i]

    z = np.linspace(temp.min(), temp.max())
    kde = scipy.stats.gaussian_kde(temp)

    plt.plot(z, kde(z)/kde(z).max(), label='alpha %d'%i)
    
temp = tsamp['sigma']
z = np.linspace(temp.min(), temp.max())
kde = scipy.stats.gaussian_kde(temp)

plt.plot(z, kde(z)/kde(z).max(), label='sigma')

plt.semilogx()    
plt.legend()

# plt.savefig('figures/ura3_0.083mMPQ-alpha-stan.pdf', bbox_inches='tight')

In [None]:
for i in range(2):

    temp = tsamp['length_scale'][:,i]

    z = np.linspace(temp.min(), temp.max())
    kde = scipy.stats.gaussian_kde(temp)


    plt.plot(z, kde(z), label='length_scale %d'%i)

plt.semilogx()    
plt.legend()

In [None]:
ncol = 2
nrow = int(1.*(dm.shape[1]-1)/ncol) + 1

plt.figure(figsize=(4*ncol, 4*nrow))

for i in range(2):
    
    plt.subplot(dm.shape[1]/ncol + 1, ncol, i + 1)
    
    plt.plot(x, tsamp['f'][:,i,:].mean(0),)
    #plt.plot(x, tsamp['f'][:,i,:].T,c='k', alpha=.2)
    plt.fill_between(x[:,0], 
                     tsamp['f'][:,i,:].mean(0)-2*tsamp['f'][:,i,:].std(0),
                     tsamp['f'][:,i,:].mean(0)+2*tsamp['f'][:,i,:].std(0),alpha=.1)
    plt.plot([x.min(), x.max()], [0, 0], lw=3, c='k')
    
    if i > 1:
        plt.ylim(-.48, .48)
    
    #plt.plot(x[train_ind],f[i,train_ind].T,'--')

In [None]:
pickle.dump(tsamp, open('ura3-0.083mMPQ-replicate-samples.pkl', 'wb'))

In [None]:
temp = pickle.load(open('ura3-0.083mMPQ-batch-samples.pkl', 'rb'))

In [None]:
for plate in plates:
    
    fname = 'ura3-0.083mMPQ-replicate-%s-samples.pkl'%plate
    
    if fname in os.listdir('samples'):
        print plate, 'already done!'
        continue
    
    ds = machine.search(plates=[plate], Strain='ura3', **{'mM PQ':[0.0, .083], 'M NaCl':[4.2, None]})

    ds.log()
    ds.filter()
    ds.trim(5)
    ds.poly_scale(2, groupby=['plate', 'mM PQ'])

    ds.data = ds.data.iloc[::3,:]
    
    xraw ,y, design, labels = ds.build(['mM PQ', 'plate'],scale=True)
    
    ymean, ystd = y.mean(), y.std()
    y = (y-y.mean())/y.std()
    x = (xraw-xraw.mean())/xraw.std()
    
    dm = np.zeros((y.shape[1], 2))
    dm[:,0] = 1
    dm[:,1] = design['mM PQ'] != labels[0].index('0.0')
    
    p = dm.shape[1]
    n = x.shape[0]

    train_data = {
        'N': n,
        'P':y.shape[1],
        'K':dm.shape[1],
        'L':2,
        'prior':[1,2],     
        'design': dm
    }

    train_data['y'] = y.T
    train_data['x'] = x[:,0]

    train_data['alpha_prior'] = [[1,1], [.1, 1]]
    train_data['length_scale_prior'] = [[.5, .5], [.5,.5]]
    train_data['marginal_alpha_prior'] = [.5, .1]
    train_data['marginal_lengthscale_prior'] = [8, 2.0]

    train_data['sigma_prior'] = [.1,1.5]

    tsamples = gp_multi_marginal.sampling(data=train_data, chains=4, iter=2000, control = {'adapt_delta': 0.8})
    
    summary = tsamples.summary()
    summary = pd.DataFrame(summary['summary'], columns=summary['summary_colnames'], index=summary['summary_rownames'])
    
    tsamp = tsamples.extract(permuted=True)
    pickle.dump(tsamp, open('samples/%s'%fname, 'wb'))
    
    print plate
    print summary.Rhat.describe()

In [None]:
for plate in plates:
    
    fname = 'ura3-0.083mMPQ-replicate-null-%s-samples.pkl'%plate
    
    if fname in os.listdir('samples'):
        print plate, 'already done!'
        continue
    
    ds = machine.search(plates=[plate], Strain='ura3', **{'mM PQ':[0.0, .083], 'M NaCl':[4.2, None]})

    ds.log()
    ds.filter()
    ds.trim(5)
    ds.poly_scale(2, groupby=['plate', 'mM PQ'])

    ds.data = ds.data.iloc[::3,:]
    
    xraw ,y, design, labels = ds.build(['mM PQ', 'plate'],scale=True)
    
    ymean, ystd = y.mean(), y.std()
    y = (y-y.mean())/y.std()
    x = (xraw-xraw.mean())/xraw.std()
    
    dm = np.zeros((y.shape[1], 2))
    dm[:,0] = 1
    dm[:,1] = design['mM PQ'] != labels[0].index('0.0')
    
    p = dm.shape[1]
    n = x.shape[0]

    train_data = {
        'N': n,
        'P':y.shape[1],
        'K':dm.shape[1],
        'L':2,
        'prior':[1,2],     
        'design': dm
    }

    train_data['y'] = y.T
    train_data['x'] = x[:,0]

    train_data['alpha_prior'] = [[1,1], [.1, 1]]
    train_data['length_scale_prior'] = [[.5, .5], [.5,.5]]
    train_data['marginal_alpha_prior'] = [.5, .1]
    train_data['marginal_lengthscale_prior'] = [8, 2.0]

    train_data['sigma_prior'] = [.1,1.5]

    tsamples = gp_multi.sampling(data=train_data, chains=4, iter=2000, control = {'adapt_delta': 0.8})
    
    summary = tsamples.summary()
    summary = pd.DataFrame(summary['summary'], columns=summary['summary_colnames'], index=summary['summary_rownames'])
    
    tsamp = tsamples.extract(permuted=True)
    pickle.dump(tsamp, open('samples/%s'%fname, 'wb'))
    
    print plate
    print summary.Rhat.describe()

In [None]:
plt.figure(figsize=(8, 4))

for p, plate in enumerate(plates):
    
    ds = machine.search(plates = [plate], Strain='ura3', **{'mM PQ':[0.0], 'M NaCl':[4.2, None]})

    ds.log()
    ds.filter()
    ds.trim(5)
    ds.poly_scale(2, groupby=['plate', 'mM PQ'])

    ds.data = ds.data.iloc[::3,:]
    
    xraw ,y, design, labels = ds.build(['mM PQ', 'plate'],scale=True)
    ymean, ystd = y.mean(), y.std()
    
    xraw = xraw[:31,:]
    
    tsamp = pickle.load(open('samples/ura3-0.083mMPQ-replicate-%s-samples.pkl'%plate, 'rb'))
    nsamp = pickle.load(open('samples/ura3-0.083mMPQ-replicate-null-%s-samples.pkl'%plate, 'rb'))

    plt.subplot(121)    
    temp = nsamp['f'][:,1,:31]
    temp = temp*ystd #+ ymean
    plt.plot(xraw, temp.mean(0),)
    plt.fill_between(xraw[:,0], 
                     temp.mean(0)-2*temp.std(0),
                     temp.mean(0)+2*temp.std(0),alpha=.1)

    plt.subplot(122)    
    temp = tsamp['f'][:,1,:31]
    temp = temp*ystd #+ ymean
    plt.plot(xraw, temp.mean(0),)
    plt.fill_between(xraw[:,0], 
                     temp.mean(0)-2*temp.std(0),
                     temp.mean(0)+2*temp.std(0),alpha=.1)
    

plt.subplot(121)
plt.xlabel('time (h)', fontsize=16)
plt.ylabel('log(OD)', fontsize=16)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.plot([xraw.min(), xraw.max()], [0,0], c='k')

plt.subplot(122)
plt.xlabel('time (h)', fontsize=16)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.plot([xraw.min(), xraw.max()], [0,0], c='k')

plt.tight_layout()

plt.savefig('figures/ura3_0.083mMPQ-replicate-effectFunctions.pdf', bbox_inches='tight')

In [None]:
plt.figure(figsize=(8, 4))

for p, plate in enumerate(plates):
    
    ds = machine.search(plates = [plate], Strain='ura3', **{'mM PQ':[0.0], 'M NaCl':[4.2, None]})

    ds.log()
    ds.filter()
    ds.trim(5)
    ds.poly_scale(2, groupby=['plate', 'mM PQ'])

    ds.data = ds.data.iloc[::3,:]
    
    xraw ,y, design, labels = ds.build(['mM PQ', 'plate'],scale=True)
    ymean, ystd = y.mean(), y.std()
    
    xraw = xraw[:31,:]
    
    tsamp = pickle.load(open('samples/ura3-0.083mMPQ-replicate-%s-samples.pkl'%plate, 'rb'))
    nsamp = pickle.load(open('samples/ura3-0.083mMPQ-replicate-null-%s-samples.pkl'%plate, 'rb'))
    
    temp = nsamp['f'][:,1,:31]
    temp = temp*ystd #+ ymean
    
    temp2 = tsamp['f'][:,1,:31]
    temp2 = temp2*ystd #+ ymean
    
    plt.boxplot([np.log10(temp2.var(0)/temp.var(0))], positions=[p], showfliers=False)

plt.xticks([])
plt.plot([-1, len(plates)], [0,0], c='k')   
plt.xlim(-1, len(plates))
plt.yticks(fontsize=16)

plt.savefig('figures/ura3_0.083mMPQ-replicate-effectFunctionInterval.pdf', bbox_inches='tight')

In [None]:
plt.figure(figsize=(9,9))

for p, plate in enumerate(plates):
    
#     ds = machine.search(plates = [plate], Strain='ura3', **{'mM PQ':[0.0], 'M NaCl':[4.2, None]})

#     ds.log()
#     ds.filter()
#     ds.trim(5)
#     ds.poly_scale(2, groupby=['plate', 'mM PQ'])

#     ds.data = ds.data.iloc[::3,:]
    
    xraw ,y, design, labels = ds.build(['mM PQ', 'plate'],scale=True)
    ymean, ystd = y.mean(), y.std()
    
    xraw = xraw[:31,:]
    
    tsamp = pickle.load(open('samples/ura3-0.083mMPQ-replicate-%s-samples.pkl'%plate, 'rb'))
    nsamp = pickle.load(open('samples/ura3-0.083mMPQ-replicate-null-%s-samples.pkl'%plate, 'rb'))
    
    temp = tsamp['sigma'] + tsamp['marginal_alpha']
    temp2 = nsamp['sigma']

    plt.subplot(3, 3, p+1)
    plt.boxplot([temp2], positions=[0], showfliers=False, vert=False, widths=[.5])
    plt.boxplot([tsamp['sigma']], positions=[1], showfliers=False, boxprops={'color':'blue'}, widths=[.5], vert=False)
    plt.boxplot([tsamp['marginal_alpha']], positions=[2], showfliers=False, boxprops={'color':'red'}, widths=[.5], vert=False)
    plt.boxplot([tsamp['sigma'] + tsamp['marginal_alpha']], positions=[3], showfliers=False, boxprops={'color':'green'}, widths=[.5], vert=False)
    plt.ylim(-1, 4)
    
    if p % 3 == 0:
        plt.yticks(range(4), ['']*4)
    else:
        plt.yticks([])
        
    plt.xticks(fontsize=12)
    
plt.tight_layout()
plt.savefig('figures/ura3_0.083mMPQ-varianceTotal-replicate.pdf', bbox_inches='tight')