In [None]:
%pylab

In [None]:
%matplotlib inline

In [None]:
import popmachine, itertools, patsy, GPy, scipy
import pandas as pd
from popmachine.normal import MultivariateNormal

In [None]:
machine = popmachine.Machine('sqlite:///../popmachine_local/.popmachine.db')

In [None]:
ds = machine.search(include=['Strain'], **{'mM PQ':[0.0, 0.083, 0.333], 'M NaCl':['4.2', None]})
ds.meta.plate.unique()

In [None]:
ds = machine.search(plates=[u'20161010_PQ_osmo', u'20150517 PQ 3', u'20161107_PQ_osmo_combo',
       u'20150715 PQ 8', u'20150702 PQ 6', u'20150630 PQ 5', u'20150704 PQ 7', u'20150717 PQ 9', u'20150607 PQ 4'], include=['Strain'], **{'mM PQ':[0.0, 0.083, 0.333, 333], 'M NaCl':['4.2', None]})
ds.meta.Strain.unique()

In [None]:
# ds = machine.search(plates=[u'20161010_PQ_osmo', u'20150517 PQ 3', u'20161107_PQ_osmo_combo',
#        u'20150715 PQ 8', u'20150702 PQ 6', u'20150630 PQ 5', u'20150704 PQ 7', u'20150717 PQ 9'], Strain='ura3', **{'mM PQ':[0.0, 0.083, 0.333], 'M NaCl':['4.2', None]})

ds = machine.search(plates=[u'20161010_PQ_osmo', u'20150517 PQ 3', u'20161107_PQ_osmo_combo',
       u'20150715 PQ 8', u'20150702 PQ 6', u'20150630 PQ 5', u'20150704 PQ 7', u'20150717 PQ 9', u'20150607 PQ 4'], Strain='ura3', **{'mM PQ':[0.0, 0.083, 0.33, 0.333,333.0], 'M NaCl':['4.2', None]})
ds.meta.loc[ds.meta['mM PQ'] == '333.0', 'mM PQ'] = '0.333'
ds.trim(10)
ds.log()
ds.poly_scale(2,groupby=['plate','mM PQ'])
ds.filter()

In [None]:
plt.figure(figsize=(20,6))

ds.plot(columns=['plate'], colorby=['mM PQ'], buff=.3)

In [None]:
for pq in ['0.0', '0.083', '0.333']:
    temp = ds.data.loc[:,ds.meta['mM PQ']==pq]
    temp.to_csv('ura3-%smMPQ.csv'%pq,)
    
    plt.figure()
    plt.plot(temp.index, temp.values, c='k', alpha=.4)
    plt.ylim(-.6,2.6)

## parameters 

In [None]:
params = pd.DataFrame()

plt.figure(figsize=(10,4))

for pq in ['0.0', '0.083', '0.333']:
    temp = pd.read_csv('ura3-%smMPQ_params.csv'%pq,index_col=0)
    temp['mMPQ'] = float(pq)
    temp['plate'] = ds.meta.loc[ds.meta['mM PQ']==pq, 'plate'].values
    
    params = pd.concat((params, temp), 0)
    
    plt.subplot(131)
    plt.hist(temp['mu.model'],alpha=.6, label='mM PQ = %s'%pq, normed=True)
    plt.legend()
    
    plt.subplot(132)
    plt.hist(temp['lambda.model'],alpha=.6, label='mM PQ = %s'%pq, normed=True)
    
    plt.subplot(133)
    plt.hist(temp['A.model'],alpha=.6, label='mM PQ = %s'%pq, normed=True)
    


In [None]:
params.head()

In [None]:
g = params.groupby(['plate', 'mMPQ'])
g.groups.keys()

In [None]:

g = params.groupby(['mMPQ', 'plate'])

plt.figure(figsize=(10,10))
for i, p in enumerate(['mu', 'lambda', 'A']):
    z = np.linspace(params[p+'.model'].min()*.9, params[p+'.model'].max()*1.1)
    
    for j, pq in enumerate([0.0, .083, .333]):
        plt.subplot(3,3,i+j*3+1)
        
        if j == 0:
            plt.title(p)
        if i == 0:
            plt.ylabel('mM PQ = %.3lf'%pq)
        
        for pl in params.plate.unique():
            temp = g.get_group((pq, pl))
                        
            kde = scipy.stats.gaussian_kde(temp[p+'.model'].values)
            #plt.plot(z, kde(z)/kde(z).max(), label=pl)
            plt.plot(z, kde(z), label=pl)
            
            #plt.hist(temp[p+'.model'],alpha=.5, normed=True)
            
        kde = scipy.stats.gaussian_kde(params.loc[params.mMPQ==pq, p+'.model'].values)
        #plt.plot(z, kde(z)/kde(z).max(), label=pl, c='k', lw=3)
        plt.plot(z, kde(z), label=pl, c='k', lw=3)
            
plt.tight_layout()

plt.savefig('figures/ura3-mMPQ-params-kde.pdf', bbox_inches='tight')

In [None]:

g = params.groupby(['mMPQ', 'plate'])

plt.figure(figsize=(10,10))
for i, p in enumerate(['mu', 'lambda', 'A']):
    z = np.linspace(params[p+'.model'].min()*.9, params[p+'.model'].max()*1.1)
    
    for j, pq in enumerate([.083, .333]):
        plt.subplot(2,3,i+j*3+1)
        
        if j == 0:
            plt.title(p)
        if i == 0:
            plt.ylabel('mM PQ = %.3lf'%pq)
        
        for pl in params.plate.unique():
            cond = g.get_group((pq, pl))[p+'.model']
            cont = g.get_group((0.0, pl))[p+'.model']
                        
            kde1 = scipy.stats.gaussian_kde(cond.values)
            kde2 = scipy.stats.gaussian_kde(cont.values)
            plt.plot(z, kde1(z)-kde2(z), label=pl)
            
            #plt.hist(temp[p+'.model'],alpha=.5, normed=True)
            
        kde = scipy.stats.gaussian_kde(params.loc[params.mMPQ==pq, p+'.model'].values)
        #plt.plot(z, kde(z)/kde(z).max(), label=pl, c='k', lw=3)
        #plt.plot(z, kde(z), label=pl, c='k', lw=3)
            
plt.tight_layout()

# plt.savefig('figures/ura3-mMPQ-params.pdf', bbox_inches='tight')

In [None]:
g = params.groupby(['mMPQ', 'plate'])
for p in ['mu', 'lambda', 'A']:
    z = np.linspace(params[p+'.model'].min()*.9, params[p+'.model'].max()*1.1)
    
    plt.figure(figsize=(9,9))
    for i,pl in enumerate(params.plate.unique()):
        plt.subplot(3,3,i+1)
        
        for j, pq in enumerate([0.0, .083, .333]):
            cond = g.get_group((pq, pl))[p+'.model']
            
            kde = scipy.stats.gaussian_kde(cond.values)
            plt.plot(z, kde(z), label='%s mM PQ'%str(pq),c='C%d'%(j))
            
            if i == 0:
                plt.ylabel('mM PQ = %.3lf'%pq)
                plt.legend()
                
            plt.yticks([])
                
        _,yl = plt.ylim()
        
        for j, pq in enumerate([.083, .333]):
            cond = g.get_group((pq, pl))[p+'.model']
            cont = g.get_group((0.0, pl))[p+'.model']
            tstat, pval = scipy.stats.ttest_ind(cond, cont)

            if pval < 0.05:
                #plt.scatter([cond.mean()], [-.1*yl], marker='x', color='C%d'%(j+1), s=-10*np.log10(pval))
                plt.scatter([cond.mean()], [-.1*yl], marker='x', color='C%d'%(j+1))

    plt.tight_layout()
    plt.savefig('figures/ura3-mMPQ-kde-%s.pdf'%p, bbox_inches='tight')

In [None]:
cond = g.get_group((pq, pl))[p+'.model']  
cont = g.get_group((0.0, pl))[p+'.model']

tstat, pval = scipy.stats.ttest_ind(cond, cont)
pval

In [None]:
plates = params.plate.unique().tolist()
plt.figure(figsize=(10,10))
for i, p in enumerate(['mu', 'lambda', 'A']):
    
    for j, pq in enumerate([.083, .333]):
        plt.subplot(2,3,i+j*3+1)
        
        if j == 0:
            plt.title(p)
        if i == 0:
            plt.ylabel('mM PQ = %.3lf'%pq)
        
        for pl in plates:
            cond = g.get_group((pq, pl))[p+'.model']
            cont = g.get_group((0.0, pl))[p+'.model']
            
            plt.boxplot([cond], positions=[plates.index(pl)*2], boxprops={'color':'k'}, showfliers=False)
            plt.boxplot([cont], positions=[plates.index(pl)*2+1], boxprops={'color':'b'}, showfliers=False)
            
            
        plt.xlim(0, len(plates)*2)
                        
        

In [None]:
plt.figure(figsize=(9,9))

g = ds.meta.groupby(['plate', 'mM PQ'])

keys = ds.meta.plate.unique().tolist()
for k, ind in g:
    temp = ds.data.iloc[:,ind.index]
    
    p, pq = k
    
    i = keys.index(p)
    
    plt.subplot(3,3,i+1)
    
    
        
    if i == 0:
        if pq == '0.0':
            plt.plot(temp.index, temp.values[:,0], c='k', label='mM PQ = 0.0')
            plt.plot(temp.index, temp.values[:,1:], c='k')
        elif pq == '0.333':
            plt.plot(temp.index, temp.values[:,0], c='limegreen', label='mM PQ = 0.333')
            plt.plot(temp.index, temp.values[:,1:], c='limegreen')
        else:
            plt.plot(temp.index, temp.values[:,0], c='cyan', label='mM PQ = 0.083')
            plt.plot(temp.index, temp.values[:,1:], c='cyan')
        plt.legend()
        
    else:
        if pq == '0.0':
            plt.plot(temp.index, temp.values, c='k', label='mM PQ = 0.0')
        elif pq == '0.333':
            plt.plot(temp.index, temp.values, c='limegreen', label='mM PQ = 0.333')
        else:
            plt.plot(temp.index, temp.values, c='cyan', label='mM PQ = 0.083')
            
    if i % 3 == 0:
        plt.ylabel('log(OD)', fontsize=14)
    if i > 5:
        plt.xlabel('time (h)', fontsize=14)
            
    plt.ylim(ds.data.min().min(), ds.data.max().max())
    
plt.savefig('figures/ura3_PQ_data.pdf', bbox_inches='tight')

In [None]:
pq0 = ds.meta['mM PQ'] == '0.0'
pq333 = ds.meta['mM PQ'] == '0.333'
pq083 = ds.meta['mM PQ'] == '0.083'

m = (ds.data.loc[:,pq083].mean(1) - ds.data.loc[:,pq0].mean(1))
s = np.sqrt(ds.data.loc[:,pq083].var(1) + ds.data.loc[:,pq0].var(1))

plt.plot(ds.data.index, m, c='cyan', label='mM PQ = 0.083')
plt.fill_between(ds.data.index, m-2*s, m+2*s, alpha=.4, color='cyan')

m = (ds.data.loc[:,pq333].mean(1) - ds.data.loc[:,pq0].mean(1))
s = np.sqrt(ds.data.loc[:,pq333].var(1) + ds.data.loc[:,pq0].var(1))

plt.plot(ds.data.index, m, c='limegreen', label='mM PQ = 0.333')
plt.fill_between(ds.data.index, m-2*s, m+2*s, alpha=.4, color='limegreen')

plt.plot([ds.data.index.min(), ds.data.index.max()], [0,0], c='k', lw=2)

plt.legend()

In [None]:
pvals = []
tstats = []

for i in range(ds.data.shape[0]):
    
    p1 = ds.data.loc[ds.data.index[i],pq083]
    p2 = ds.data.loc[ds.data.index[i],pq0]
    
    ts, pv = scipy.stats.ttest_ind(p1, p2)
    
    pvals.append(pv)
    tstats.append(ts)
    
# plt.plot(ds.data.index, -np.log10(pvals), c='cyan', label='mM PQ = 0.083')
plt.plot(ds.data.index, tstats, c='cyan', label='mM PQ = 0.083')


pvals = []
tstats = []

for i in range(ds.data.shape[0]):
    
    p1 = ds.data.loc[ds.data.index[i],pq333]
    p2 = ds.data.loc[ds.data.index[i],pq0]
    
    ts, pv = scipy.stats.ttest_ind(p1, p2)
    
    pvals.append(pv)
    tstats.append(ts)
    
df = p1.shape[0] + p2.shape[0] - 2
t = scipy.stats.t(df=df)
    
# plt.plot(ds.data.index, -np.log10(pvals), c='limegreen', label='mM PQ = 0.333')
plt.plot(ds.data.index, tstats, c='limegreen', label='mM PQ = 0.333')


# plt.plot([ds.data.index.min(), ds.data.index.max()], [-np.log10(.05)]*2, c='k', lw=2)
plt.plot([ds.data.index.min(), ds.data.index.max()], [t.ppf(.05)]*2, c='k', lw=2)

plt.legend()

plt.ylabel('$t$', fontsize=14)
plt.xlabel('time (h)', fontsize=14)    
plt.savefig('figures/ura3_PQ_data_ttest.pdf', bbox_inches='tight')

In [None]:
plt.figure(figsize=(14,4))

for i in range(ds.data.shape[0]):
    
    p1 = ds.data.loc[ds.data.index[i],pq083]
    p2 = ds.data.loc[ds.data.index[i],pq0]
    
    ts, pv = scipy.stats.ttest_ind(p1, p2)
    
    plt.boxplot([p1], positions=[2*i], boxprops={'color':'b'}, showfliers=False)
    plt.boxplot([p2], positions=[2*i + 1], showfliers=False)
    

for i in range(ds.data.shape[0]):
    
    p1 = ds.data.loc[ds.data.index[i],pq333]
    p2 = ds.data.loc[ds.data.index[i],pq0]
    
    ts, pv = scipy.stats.ttest_ind(p1, p2)
    
# plt.xlim(0, ds.data.shape[0]*2)
plt.xticks(range(0, ds.data.shape[0]*2, 8), range(0, ds.data.shape[0], 4))
plt.xlim(40, 80)

plt.ylabel('$t$', fontsize=14)
# plt.xlabel('time (h)', fontsize=14)    
# plt.savefig('figures/ura3_PQ_data_ttest.pdf', bbox_inches='tight')

In [None]:

for j, pq in enumerate(['0.083', '0.333']):
    plt.subplot(1,2,j+1)
    plt.title('mM PQ = %s'%pq, fontsize=16)
    for p in ds.meta.plate.unique():
        tstats = []
        for i in range(ds.data.shape[0]):

            s = (ds.meta['mM PQ'] == pq) & (ds.meta['plate'] == p)
            p1 = ds.data.loc[ds.data.index[i],s]

            s = (ds.meta['mM PQ'] == '0.0') & (ds.meta['plate'] == p)
            p2 = ds.data.loc[ds.data.index[i],s]

            ts, pv = scipy.stats.ttest_ind(p1, p2)

            tstats.append(ts)

        plt.plot(ds.data.index, tstats)
        
    if j == 0:
        plt.ylabel('$t$', fontsize=14)
    plt.xlabel('time (h)', fontsize=14)

    df = p1.shape[0] + p2.shape[0] - 2
    t = scipy.stats.t(df=df)
    plt.plot([ds.data.index.min(), ds.data.index.max()], [t.ppf(.025)]*2, c='k', lw=2)
    plt.plot([ds.data.index.min(), ds.data.index.max()], [t.ppf(.975)]*2, c='k', lw=2)

plt.savefig('figures/ura3_PQ_data_ttest-batch.pdf', bbox_inches='tight')

In [None]:
df = p1.shape[0] + p2.shape[0] - 2
t = scipy.stats.t(df=df)

In [None]:
t.ppf(.05)

In [None]:
t.cdf(tstats) - pvals

In [None]:
ts, pv = scipy.stats.ttest_ind(p1, p2)

## edge effects 

In [None]:
def wellPosition(number):
    number = int(number)
    if number > 100:
        return wellPosition(number%100)
    if number == 0:
        return 9,9
    return [(number-1) % 10, (number-1)/10]

In [None]:
# ds = machine.search(plates=[u'20161010_PQ_osmo', u'20150517 PQ 3', u'20161107_PQ_osmo_combo',
#        u'20150715 PQ 8', u'20150702 PQ 6', u'20150630 PQ 5', u'20150704 PQ 7', u'20150717 PQ 9'], Strain='ura3', **{'mM PQ':[0.0, 0.083, 0.333], 'M NaCl':['4.2', None]})

ds = machine.search(plates=[u'20161010_PQ_osmo', u'20150517 PQ 3', u'20161107_PQ_osmo_combo',
       u'20150715 PQ 8', u'20150702 PQ 6', u'20150630 PQ 5', u'20150704 PQ 7', u'20150717 PQ 9', u'20150607 PQ 4'], Strain='ura3', **{'mM PQ':[0.0, 0.083, 0.33, 0.333,333.0], 'M NaCl':['4.2', None]})
ds.meta.loc[ds.meta['mM PQ'] == '333.0', 'mM PQ'] = '0.333'
ds.trim(10)
ds.log()
ds.poly_scale(2,groupby=['plate','mM PQ'])
ds.filter()

In [None]:
plt.figure(figsize=(20,6))

ds.plot(columns=['plate'], colorby=['mM PQ'], buff=.3)

In [None]:
ds = machine.search(plates=[u'20161010_PQ_osmo', u'20150517 PQ 3', u'20161107_PQ_osmo_combo',
       u'20150715 PQ 8', u'20150702 PQ 6', u'20150630 PQ 5', u'20150704 PQ 7', u'20150717 PQ 9', u'20150607 PQ 4'], Strain='ura3', **{'mM PQ':[0.0, 0.083, 0.33, 0.333], 'M NaCl':['4.2', None]})
ds.meta.loc[ds.meta['mM PQ'] == '333.0', 'mM PQ'] = '0.333'
ds.trim(10)
ds.log()
ds.poly_scale(2,groupby=['plate','mM PQ'])
ds.filter()

position = np.array(map(wellPosition,ds.meta.number))
dist = np.column_stack((position.min(1),(9-position).min(1))).min(1)
select = dist != 0

ds.data = ds.data.iloc[:,select]
ds.meta = ds.meta.iloc[select,:]

ds.data.columns=range(ds.data.shape[1])
ds.meta.index=range(ds.meta.shape[0])

In [None]:
plt.figure(figsize=(20,6))

ds.plot(columns=['plate'], colorby=['mM PQ'], buff=.3)