In [1]:
import pandas as pd
import numpy as np
from scipy import interpolate
import matplotlib.pyplot as plt
import os

In [2]:
os.chdir("..")

In [3]:
%matplotlib inline

In [4]:
data = pd.read_excel('data/Raw_growth_data2.xlsx',sheetname='Raw data (OD600)SLIM',skiprows=[0],index_col=None)

In [5]:
skip_strains = ['ura3_EV', u'cspD1_EV', u'cspD1_cspD1','ura3']

In [6]:
if not 'figures' in os.listdir('.'):
    os.mkdir('figures')
    os.mkdir('figures/data')
    os.mkdir('figures/data/standard')
    os.mkdir('figures/data/osmotic')
    os.mkdir('figures/data/heatshock')
    os.mkdir('figures/data/paraquat')

## Update labels 

In [7]:
# update all strains to same label
data.loc[data.Strain==1179,'Strain'] = 'copR'
data.loc[data.Strain=='VNG1179','Strain'] = 'copR'
data.loc[data.Strain=='VNG1179C','Strain'] = 'copR'
data.loc[data.Strain=='VNG0194','Strain'] = 'VNG0194H'

In [8]:
# add indicator columns for each condition
data['standard'] = (data.Condition.isnull()).astype(int)
data['paraquat'] = (data.Condition=='0.333mM PQ').astype(int)
data['peroxide'] = (data.Condition=='5mM H2O2').astype(int)
data['osmotic'] = (data.Condition=='2.9M NaCl').astype(int)
data['heatshock'] = (data.Condition=='54C shift at 16h').astype(int)

# tidy 

In [9]:
# convert data to tidy formay
tidy = pd.melt(data,id_vars=['Experiment','Well','Strain','Biological replicate','Technical Replicate','standard','paraquat','peroxide','osmotic','heatshock'],value_vars=data.columns.values[7:-5].tolist(),value_name='OD',var_name='time')
tidy = tidy[~tidy.OD.isnull()]
tidy.shape

(264372, 12)

# Plot

## Standard 

In [10]:
def plot_standard(norm,method):
    
    if not method in os.listdir("figures/data/standard/"):
        os.mkdir("figures/data/standard/"+method)

    standard = norm[norm.standard==1]
    g = standard.groupby('Strain')

    ura3 = g.get_group('ura3')
    g_ura3 = ura3.groupby(['Experiment','Well'])

    for strain,temp in g:
        g2 = temp.groupby(['Experiment','Well'])

        plt.figure(figsize=(8,6))

#         plt.subplot(121)
#         for well,temp2 in g_ura3:
#             temp2.sort_values('time')
#             plt.plot(temp2.time,temp2.OD,'k',alpha=.2)    

#         plt.subplot(122)
        for well,temp2 in g2:
            temp2.sort_values('time')
            plt.plot(temp2.time,temp2.OD,'g',alpha=.6)

#         plt.title(strain,fontsize=35)

#         plt.subplot(121)
        plt.ylabel("log(OD)",fontsize=30)
        plt.xlabel("time (h)",fontsize=30)
        plt.yticks(fontsize=25)
        plt.xticks(fontsize=25)
        plt.grid(True,color='grey')
#         plt.xlim(standard.time.min(),standard.time.max())
        
        ylim = (min(ura3.OD.min(),standard.OD.min(),),
                max(ura3.OD.max(),standard.OD.max(),))
        plt.ylim(ylim)
        
#         plt.subplot(122)
#         plt.xlabel("time (h)",fontsize=30)
#         plt.yticks(fontsize=25)
#         plt.xticks(fontsize=25)
#         plt.grid(True,color='grey')
#         plt.xlim(-1,44)

        plt.savefig("figures/data/standard/%s/%s"%(method,strain),bbox_inches='tight',dpi=150)
        plt.close()

## Paraquat 

In [11]:
def plot_paraquat(norm,method):
    
    if not method in os.listdir("figures/data/paraquat/"):
        os.mkdir("figures/data/paraquat/"+method)

    paraquat = norm.loc[(norm.standard==1) | (norm.paraquat==1),:]
    g = paraquat.groupby('Strain')

    ura3 = g.get_group('ura3')
    g_ura3 = ura3.groupby(['standard'])
    ura3_standard = g_ura3.get_group(1)
    ura3_paraquat = g_ura3.get_group(0)

    for strain,temp in g:
        #if strain in skip_strains:
        #    continue

        g2 = temp.groupby('standard')
        
        try:
            _standard = g2.get_group(1)
            _paraquat = g2.get_group(0)
        except:
            continue

    #     g2 = temp.groupby(['Experiment','Well'])

        plt.figure(figsize=(8,6))

        ylim = (min(ura3_standard.OD.min(),ura3_paraquat.OD.min(),_standard.OD.min(),_paraquat.OD.min()),
                max(ura3_standard.OD.max(),ura3_paraquat.OD.max(),_standard.OD.max(),_paraquat.OD.max()))

#         plt.subplot(121)
#         for well,temp2 in ura3_standard.groupby(['Experiment','Well']):
#             temp2.sort_values('time')
#             plt.plot(temp2.time,temp2.OD,'k',alpha=.6)

#         for well,temp2 in ura3_paraquat.groupby(['Experiment','Well']):
#             temp2.sort_values('time')
#             plt.plot(temp2.time,temp2.OD,'g',alpha=.6)

#         plt.subplot(122)
        for well,temp2 in _standard.groupby(['Experiment','Well']):
            temp2.sort_values('time')
            plt.plot(temp2.time,temp2.OD,'k',alpha=.6)

        for well,temp2 in _paraquat.groupby(['Experiment','Well']):
            temp2.sort_values('time')
            plt.plot(temp2.time,temp2.OD,'g',alpha=.6)

#         plt.subplot(121)
#         plt.title('ura3',fontsize=35)
        plt.ylabel("log(OD)",fontsize=30)
        plt.xlabel("time (h)",fontsize=30)
        plt.yticks(fontsize=25)
        plt.xticks(fontsize=25)
        plt.grid(True,color='grey')
#         plt.xlim(-1,44)
        plt.ylim(ylim)

#         plt.subplot(122)
#         plt.title(strain,fontsize=35)
#         plt.xlabel("time (h)",fontsize=30)
#         plt.yticks(fontsize=25)
#         plt.xticks(fontsize=25)
#         plt.grid(True,color='grey')
#         plt.xlim(-1,44)
#         plt.ylim(ylim)

        plt.tight_layout()

        plt.savefig("figures/data/paraquat/%s/%s"%(method,strain),bbox_inches='tight',dpi=150)
        plt.close()

## Osmotic

In [12]:
def plot_osmotic(norm,method):
    
    if not method in os.listdir("figures/data/osmotic/"):
        os.mkdir("figures/data/osmotic/"+method)

    osmotic = norm.loc[(norm.standard==1) | (norm.osmotic==1),:]
    g = osmotic.groupby('Strain')

    ura3 = g.get_group('ura3')
    g_ura3 = ura3.groupby(['standard'])
    ura3_standard = g_ura3.get_group(1)
    ura3_osmotic = g_ura3.get_group(0)

    for strain,temp in g:
        #if strain in skip_strains:
        #    continue

        g2 = temp.groupby('standard')
        
        try:
            _standard = g2.get_group(1)
            _osmotic = g2.get_group(0)
        except:
            continue
                

    #     g2 = temp.groupby(['Experiment','Well'])

        plt.figure(figsize=(8,6))

        ylim = (min(ura3_standard.OD.min(),ura3_osmotic.OD.min(),_standard.OD.min(),_osmotic.OD.min()),
                max(ura3_standard.OD.max(),ura3_osmotic.OD.max(),_standard.OD.max(),_osmotic.OD.max()))

#         plt.subplot(121)
#         for well,temp2 in ura3_standard.groupby(['Experiment','Well']):
#             temp2.sort_values('time')
#             plt.plot(temp2.time,temp2.OD,'k',alpha=.6)

#         for well,temp2 in ura3_osmotic.groupby(['Experiment','Well']):
#             temp2.sort_values('time')
#             plt.plot(temp2.time,temp2.OD,'g',alpha=.6)

#         plt.subplot(122)
        for well,temp2 in _standard.groupby(['Experiment','Well']):
            temp2.sort_values('time')
            plt.plot(temp2.time,temp2.OD,'k',alpha=.6)

        for well,temp2 in _osmotic.groupby(['Experiment','Well']):
            temp2.sort_values('time')
            plt.plot(temp2.time,temp2.OD,'g',alpha=.6)

#         plt.subplot(121)
#         plt.title('ura3',fontsize=35)
        plt.ylabel("log(OD)",fontsize=30)
        plt.xlabel("time (h)",fontsize=30)
        plt.yticks(fontsize=25)
        plt.xticks(fontsize=25)
        plt.grid(True,color='grey')
#         plt.xlim(-1,44)
        plt.ylim(ylim)

#         plt.subplot(122)
#         plt.title(strain,fontsize=35)
#     #     plt.ylabel("log(OD)",fontsize=30)
#         plt.xlabel("time (h)",fontsize=30)
#         plt.yticks(fontsize=25)
#         plt.xticks(fontsize=25)
#         plt.grid(True,color='grey')
#         plt.xlim(-1,44)
#         plt.ylim(ylim)

        plt.tight_layout()

        plt.savefig("figures/data/osmotic/%s/%s"%(method,strain),bbox_inches='tight',dpi=150)
        plt.close()

## Heatshock

In [13]:
def plot_heatshock(norm,method):
    
    if not method in os.listdir("figures/data/heatshock/"):
        os.mkdir("figures/data/heatshock/"+method)

    heatshock = norm[norm.heatshock==1]
    g = heatshock.groupby('Strain')

    ura3 = g.get_group('ura3')
    g_ura3 = ura3.groupby(['Experiment','Well'])

    for strain,temp in g:
        g2 = temp.groupby(['Experiment','Well'])

        plt.figure(figsize=(8,6))
        plt.plot([16,16],[-.2,1.8],'cyan',lw=3)

    #     plt.subplot(121)
#         for well,temp2 in g_ura3:
#             temp2.sort_values('time')
#             plt.plot(temp2.time,temp2.OD,'k',alpha=.2)    

    #     plt.subplot(122)
        for well,temp2 in g2:
            temp2.sort_values('time')
            plt.plot(temp2.time,temp2.OD,'g',alpha=.6)


        plt.title(strain,fontsize=35)

        plt.ylabel("log(OD)",fontsize=30)
        plt.xlabel("time (h)",fontsize=30)
        plt.yticks(fontsize=25)
        plt.xticks(fontsize=25)
        plt.grid(True,color='grey')
#         plt.xlim(-1,28)

        plt.savefig("figures/data/heatshock/%s/%s"%(method,strain),bbox_inches='tight',dpi=150)
        plt.close()

# Normalize 

## Time > 4hrs & < 48 hrs 

In [14]:
# filter all heatshock data after 32 hours
norm = tidy[~((tidy.time>32) & (tidy.heatshock==1))]

In [15]:
g = norm.groupby(['Experiment','Well'])

def time_remove(x):
    
    x = x[x.time>4]
    x = x[x.time<48]
#     x.time = x.time - x.time.values[0]
#     x = x.time
        
    return x

norm = g.apply(time_remove)

## Standard final OD threshold 

In [16]:
g = norm.groupby(['Experiment','Well'])
select = g.apply(lambda x: (x.standard == 1).all() and x.OD.max() - x.OD.min() < 1.0)

temp = select[select]

## Log2 

In [17]:
g = norm.groupby(['Strain','standard','paraquat','peroxide','osmotic','heatshock'])

In [18]:
def normalize(x):
        
    x.OD = np.log2(x.OD)
    # x.OD = x.OD - x.OD[x.time==0].mean()
    return x

norm = g.apply(normalize)

In [19]:
plot_standard(norm,'log2')
plot_paraquat(norm,'log2')
plot_osmotic(norm,'log2')
plot_heatshock(norm,'log2')

## Subtract $t_0$ 

In [20]:
g = norm.groupby(['Strain','standard','paraquat','peroxide','osmotic','heatshock'])

In [21]:
def normalize(x):
    x.OD = x.OD - x.OD[x.time==x.time.min()].mean()
    return x

norm = g.apply(normalize)

In [22]:
plot_standard(norm,'log2_st0')
plot_paraquat(norm,'log2_st0')
plot_osmotic(norm,'log2_st0')
plot_heatshock(norm,'log2_st0')

In [23]:
norm.to_csv("data/tidy_normalize_log_st0.csv",index=False)

In [24]:
def plotWells(x,color='b'):
    for well,temp2 in x.groupby(['Experiment','Well']):
        temp2.sort_values('time')
        plt.plot(temp2.time,temp2.OD,color=color,alpha=.6)

## Well correction 

In [25]:
g = norm.groupby(['Strain','standard','paraquat','peroxide','osmotic','heatshock'])

In [26]:
def well_effect(x):
    
    # find mean at each time point to subtract out before computing replicate effect
    time_group = x.groupby(['time'])
    time_od = time_group.apply(np.mean).OD
    
    def subtract_effect(y):
        select = time_od.index.isin(y.time)
        y.OD = y.OD.values - (y.OD.values-time_od.values[select]).mean()
        return y
    
    replicate_group = x.groupby(['Experiment','Well'])
    return replicate_group.apply(subtract_effect)    

In [27]:
norm = g.apply(well_effect)

In [28]:
plot_standard(norm,'log2_well')
plot_paraquat(norm,'log2_well')
plot_osmotic(norm,'log2_well')
plot_heatshock(norm,'log2_well')

# Save 

In [29]:
norm.to_csv("data/tidy_normalize_all.csv",index=False)