In [1]:
import os
import glob
import pandas as pd
from pathlib import Path
import numpy as np
import bokeh.io
import bokeh.plotting
import bokeh.palettes
from bokeh.transform import jitter
import seaborn as sns
import matplotlib
from bokeh.models import HoverTool, Range1d
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure
from bokeh.transform import dodge
from scipy import stats
import pandas as pd
import math

from bokeh.layouts import row
bokeh.io.output_notebook()

from tkinter import Tk
from tkinter.filedialog import askdirectory

In [2]:
#Functions  
def bootstrap_sampling(my_array, bootstrap_samples, bootstrap_replicates):
    bs_samples=np.zeros((bootstrap_replicates, bootstrap_samples))
    bs_rep=np.arange(bootstrap_replicates)
    for i in bs_rep:
        bs_samples[i, :]=np.random.choice(my_array, bootstrap_samples)
    return bs_samples

def bootstrap_stats(bs_samples):
    samples_shape=np.shape(bs_samples)
    bs_means=np.zeros((samples_shape[0], 1))
    bs_IC_means=np.zeros((1, 2))
    bs_medians=np.zeros((samples_shape[0], 1))
    bs_IC_medians=np.zeros((1, 2))
    for i in np.arange(samples_shape[0]):
        bs_means[i]=np.mean(bs_samples[i, :])
        bs_medians[i]=np.median(bs_samples[i, :])
    bs_IC_means[0, 0]=np.quantile(bs_means, .025)
    bs_IC_means[0, 1]=np.quantile(bs_means, .975)
    bs_IC_medians[0, 0]=np.quantile(bs_medians, .025)
    bs_IC_medians[0, 1]=np.quantile(bs_medians, .975)
    return np.mean(bs_means), np.median(bs_medians), bs_IC_means, bs_IC_medians, stats.sem(bs_means)

def getMeans(df_boot_stats, strain, parameter1, timepoints):
    biorep = len(df_boot_stats.TeckRep.unique())
    if isinstance(timepoints, list):
        m = [[df_boot_stats.loc[(df_boot_stats['Strain'] == strain) & (df_boot_stats['TimePoint'] == tp) & (df_boot_stats['TeckRep'] == i+1), [parameter1]].values[0][0] for i in range(biorep)] for tp in timepoints]
    else:
        m = [df_boot_stats.loc[(df_boot_stats['Strain'] == strain) & (df_boot_stats['TimePoint'] == timepoints) & (df_boot_stats['TeckRep'] == i+1), [parameter1]].values[0][0] for i in range(biorep)]
    return np.array(m)

def computeCI(my_array):
    array_size = np.shape(my_array)
    if len(array_size)> 1:
        CI=[None]*array_size[0]
        for i in range(array_size[0]):
            bs_samples=bootstrap_sampling(my_array[i,:], array_size[1], 100)
            CI[i]=bootstrap_stats(bs_samples)
    else:
        bs_samples=bootstrap_sampling(my_array, array_size[0], 100)
        CI=bootstrap_stats(bs_samples)
    return CI

def errobar(piliDim_std, p, x_loc):
    for i in range(len(piliDim_std)):
        pos=x_loc+i+0.5
        p.line(
            x=[pos,pos],
            y=[piliDim_std[i][0], piliDim_std[i][1]], 
            line_color = 'black',
            alpha=1
        )

        p.line(
            x=[pos-0.02,pos+0.02],
            y=[piliDim_std[i][1], piliDim_std[i][1]],
            line_color = 'black',
            alpha=1
        )
        p.line(
            x=[pos-0.02,pos+0.02],
            y=[piliDim_std[i][0], piliDim_std[i][0]],
            line_color = 'black',
            alpha=1
        )
        
def errobar_dots(pili_IC, p, x_loc, stat_mu, stat_CI):
    pos=x_loc
    if stat_CI != None:
        if np.size(pili_IC[stat_CI][0]) > 1:
            p.line(
                y=[pos,pos],
                x=pili_IC[stat_CI][0], 
                line_color = 'black',
                line_width = 3,
                alpha=1
            )
        else:
            p.line(
                y=[pos,pos],
                x=[pili_IC[stat_mu]-pili_IC[stat_CI][0], pili_IC[stat_mu]+pili_IC[stat_CI][0]], 
                line_color = 'black',
                line_width = 3,
                alpha=1
            )
    p.line(
            y=[pos-0.1,pos+0.1],
            x=np.ones(2)*pili_IC[stat_mu], 
            line_color = 'black',
            line_width = 3,
            alpha=1
        )

def getCI(df_boot_stats, strain, parameter1):
    timepoints=[0,3]
    CI=[None]*2
    for i, tp in enumerate(timepoints):
        sem=df_boot_stats.loc[(df_boot_stats['Strain'] == strain) & (df_boot_stats['TimePoint'] == tp), [parameter2]].values[0][0]
        sem=sem.replace('[','',1)
        sem=sem.replace(']','',1).rsplit(' ',100)
        condition=1
        while condition:
            if sem[len(sem)-1] == '':
                sem.pop()
            else:
                condition=0
        sem=[sem[0], sem[len(sem)-1]]
        CI[i]=sem
    return CI

In [3]:
app_root_dir = os.path.join(Path.home(), "Desktop\git\PhD_codes\Mechanosensation\Python_code\Motility_increase")
#os.chdir(app_root_dir + "Data\\")
os.chdir(app_root_dir + "\\newest_data\\")
df_tracks = pd.read_csv("TrackDataMSD.csv")
df_boot_stats = pd.read_csv("BootTrackData.csv")

Strains=list(df_tracks.Strain.unique())
TimePoints=list(df_tracks.TimePoint.unique())
Biological_Replicates=list(df_tracks.TeckRep.unique())
print(Strains)
print(TimePoints)
print(len(df_tracks))

means = df_tracks.groupby(['Strain', 'TimePoint', 'TeckRep'])['RMSD'].mean().to_frame().reset_index()
stds = df_tracks.groupby(['Strain', 'TimePoint', 'TeckRep'])['RMSD'].std().to_frame().reset_index()
medians = df_tracks.groupby(['Strain', 'TimePoint', 'TeckRep'])['RMSD'].median().to_frame().reset_index()
sems = df_tracks.groupby(['Strain', 'TimePoint', 'TeckRep'])['RMSD'].sem().to_frame().reset_index()

meansS = df_tracks.groupby(['Strain', 'TimePoint', 'TeckRep'])['TRACK_MEAN_SPEED'].mean().to_frame().reset_index()
stdsS = df_tracks.groupby(['Strain', 'TimePoint', 'TeckRep'])['TRACK_MEAN_SPEED'].std().to_frame().reset_index()
mediansS = df_tracks.groupby(['Strain', 'TimePoint', 'TeckRep'])['TRACK_MEAN_SPEED'].median().to_frame().reset_index()
semsS = df_tracks.groupby(['Strain', 'TimePoint', 'TeckRep'])['TRACK_MEAN_SPEED'].sem().to_frame().reset_index()
meansS.head()

['cpdA-fliC-', 'cyaB-fliC-', 'fliC-', 'pilG-fliC-', 'pilH-fliC-', 'pilTU-fliC-']
[0, 1, 2, 3]
12027


Unnamed: 0,Strain,TimePoint,TeckRep,TRACK_MEAN_SPEED
0,cpdA-fliC-,0,1,0.891065
1,cpdA-fliC-,0,2,0.675618
2,cpdA-fliC-,0,3,0.568023
3,cpdA-fliC-,1,1,0.800024
4,cpdA-fliC-,2,1,0.645622


In [6]:
parameter1='medianSpeed'
parameter='TRACK_MEAN_SPEED'
fliC=getMeans(df_boot_stats, 'fliC-', parameter1, [0, 3])
pilTU=getMeans(df_boot_stats, 'pilTU-fliC-', parameter1, 0)
pilG=getMeans(df_boot_stats, 'pilG-fliC-', parameter1, 0)
cyaB=getMeans(df_boot_stats, 'cyaB-fliC-', parameter1, 0)
pilH=getMeans(df_boot_stats, 'pilH-fliC-', parameter1, 0)
cpdA=getMeans(df_boot_stats, 'cpdA-fliC-', parameter1, 0)

fliC_CI=computeCI(fliC)
pilTU_CI=computeCI(pilTU)
pilG_CI=computeCI(pilG)
pilH_CI=computeCI(pilH)
cyaB_CI=computeCI(cyaB)
cpdA_CI=computeCI(cpdA)

In [7]:
size_circles = 8
labels=['WT liq', 'WT sol', 'cpdA-', 'pilH-', 'cyaB-', 'pilG-', 'pilTU']
p_dots = bokeh.plotting.figure(
    width=600, 
    height=400,  
    x_axis_type='linear',
    x_range=(0, 1),
    y_axis_type='linear',
    y_range=(0, 4),
    x_axis_label = parameter + ' (µm)',
    title="Track mean speed on liquid vs. solid grown cells"
)
# 0=np.mean(bs_means), 1=np.median(bs_medians), 2=bs_IC_means, 3=bs_IC_medians, 4=stats.sem(bs_medians)
errobar_dots(fliC_CI[0], p_dots, 3.5, 0, None)
p_dots.circle(
    x = fliC[0,:],
    y = 3.5,
    color = None,
    line_color='grey',
    alpha=0.8,
    size=size_circles,
)
errobar_dots(fliC_CI[1], p_dots, 3, 0, None)
p_dots.circle(
    x = fliC[1,:],
    y = 3,
    color = None,
    line_color='black',
    alpha=0.8,
    size=size_circles,
)
errobar_dots(cpdA_CI, p_dots, 2.5, 0, None)
p_dots.circle(
    x = cpdA,
    y = 2.5,
    color = None,
    line_color='orange',
    alpha=0.8,
    size=size_circles,
)
errobar_dots(pilH_CI, p_dots, 2, 0, None)
p_dots.circle(
    x = pilH,
    y = 2,
    color = None,
    line_color='red',
    alpha=0.8,
    size=size_circles,
)
errobar_dots(cyaB_CI, p_dots, 1.5, 0, None)
p_dots.circle(
    x = cyaB,
    y = 1.5,
    color = None,
    line_color='cyan',
    alpha=0.8,
    size=size_circles,
)
errobar_dots(pilG_CI, p_dots, 1, 0, None)
p_dots.circle(
    x = pilG,
    y = 1,
    color = None,
    line_color='green',
    alpha=0.8,
    size=size_circles,
)
errobar_dots(pilTU_CI, p_dots, 0.5, 0, None)
p_dots.circle(
    x = pilTU,
    y = 0.5,
    color = None,
    line_color='black',
    alpha=0.8,
    size=size_circles,
)

p_dots.xgrid.grid_line_color = None
p_dots.ygrid.grid_line_color = None
p_dots.xaxis.minor_tick_line_color = None
p_dots.yaxis.minor_tick_line_color = None
p_dots.yaxis.ticker = [0.5, 1, 1.5, 2, 2.5, 3, 3.5]
p_dots.yaxis.major_label_overrides = {0.5: 'pilTU-', 1: 'pilG-', 1.5: 'cyaB-', 2: 'pilH-', 2.5: 'cpdA-', 3: 'WT sol', 3.5: 'WT liq'}

p_dots.output_backend = 'svg'
bokeh.io.show(p_dots)

In [None]:
colors = bokeh.palettes.d3['Category10'][10]
parameter='TRACK_MEAN_SPEED'
parameter1='medianSpeed'
parameter2='IC_medianSpeed' #'medianSpeed_sem'
Strains=['fliC-', 'pilTU-fliC-', 'pilG-fliC-', 'cyaB-fliC-', 'pilH-fliC-', 'cpdA-fliC-']
pGood = bokeh.plotting.figure(
    width=600, 
    height=600,  
    x_axis_type='linear',
    y_axis_type='linear',
    x_axis_label = 'Time (h)',
    y_axis_label = parameter + ' (µm/s)',
    title="Median tracks speed (bootstrap median, 95% CI)"
)

pGood.xgrid.visible = False
pGood.ygrid.visible = False
pGood.xaxis.minor_tick_line_color = None
pGood.yaxis.minor_tick_line_color = None
pGood.y_range=Range1d(0, 1.1)
pGood.xaxis.ticker = TimePoints

for s, strain in enumerate(Strains):
    for tp in TimePoints:
        b = [tp,tp]
        m=df_boot_stats.loc[(df_boot_stats['Strain'] == strain) & (df_boot_stats['TimePoint'] == tp), [parameter1]].values[0][0]
        sem=df_boot_stats.loc[(df_boot_stats['Strain'] == strain) & (df_boot_stats['TimePoint'] == tp), [parameter2]].values[0][0] #float(semsS[(semsS['Strain'] == strain ) & (semsS['TimePoint'] == tp)][parameter]),
        sem=sem.replace('[','',1)
        sem=sem.replace(']','',1).rsplit(' ',100)
        condition=1
        while condition:
            if sem[len(sem)-1] == '':
                sem.pop()
            else:
                condition=0
        sem=[sem[0], sem[len(sem)-1]]
        a=sem
        #if (m-sem < 0):
        #    a = [0, m+sem]
        #else: a = [m-sem, m+sem]
        pGood.line(
            x = b,
            y = a,
            color = 'black',
            alpha=0.8,
            line_width=3
        )
    pGood.line(
        source = df_boot_stats.loc[(df_boot_stats['Strain'] == strain), ['Strain', 'TimePoint', parameter1]],
        x = 'TimePoint',
        y = parameter1,
        line_color = colors[s],
        line_width=3,
        alpha=0.5
    )
    pGood.circle(
        source = df_boot_stats.loc[(df_boot_stats['Strain'] == strain), ['Strain', 'TimePoint', parameter1]],
        x = 'TimePoint',
        y = parameter1,
        line_color = colors[s],
        line_width=2,
        fill_color = 'white',
        alpha=0.5,
        size=10,
        #legend = 'Strain'
    )

pGood.output_backend = 'svg'
bokeh.io.show(pGood)

In [7]:
X=['liquid', 'solid']
fliC=getMeans(df_boot_stats, 'fliC-', parameter1)
pilTU=getMeans(df_boot_stats, 'pilTU-fliC-', parameter1)
pilG=getMeans(df_boot_stats, 'pilG-fliC-', parameter1)
cyaB=getMeans(df_boot_stats, 'cyaB-fliC-', parameter1)
pilH=getMeans(df_boot_stats, 'pilH-fliC-', parameter1)
cpdA=getMeans(df_boot_stats, 'cpdA-fliC-', parameter1)

fliC_CI=getCI(df_boot_stats, 'fliC-', parameter1)
pilTU_CI=getCI(df_boot_stats, 'pilTU-fliC-', parameter1)
pilG_CI=getCI(df_boot_stats, 'pilG-fliC-', parameter1)
cyaB_CI=getCI(df_boot_stats, 'cyaB-fliC-', parameter1)
pilH_CI=getCI(df_boot_stats, 'pilH-fliC-', parameter1)
cpdA_CI=getCI(df_boot_stats, 'cpdA-fliC-', parameter1)

data = {'Condition' : X,
        'fliC-'   : fliC,
        'pilTU-fliC-'   : pilTU,
        'pilG-fliC-'   : pilG,
        'cyaB-fliC-'   : cyaB,
        'pilH-fliC-'   : pilH,
        'cpdA-fliC-'   : cpdA}

source = ColumnDataSource(data=data)

p = figure(x_range=X, y_range=(0, 1.5), plot_height=400, plot_width=600, title="Track mean speed on liquid vs. solid grown cells")
w=0.09
p.vbar(x=dodge('Condition', -0.28, range=p.x_range), top='fliC-', width=w, source=source,
       color="bisque", legend_label='fliC-')

p.vbar(x=dodge('Condition',  -0.17,  range=p.x_range), top='pilTU-fliC-', width=w, source=source,
       color="darkgrey", legend_label="pilTU-fliC-")

p.vbar(x=dodge('Condition',  -0.06, range=p.x_range), top='pilG-fliC-', width=w, source=source,
       color="lime", legend_label="pilG-fliC-")

p.vbar(x=dodge('Condition', 0.06, range=p.x_range), top='cyaB-fliC-', width=w, source=source,
       color="royalblue", legend_label='cyaB-fliC-')

p.vbar(x=dodge('Condition',  0.17,  range=p.x_range), top='pilH-fliC-', width=w, source=source,
       color="red", legend_label="pilH-fliC-")

p.vbar(x=dodge('Condition',  0.28, range=p.x_range), top='cpdA-fliC-', width=w, source=source,
       color="green", legend_label="cpdA-fliC-")

errobar(fliC_CI, p, -0.28)
errobar(pilTU_CI, p, -0.17)
errobar(pilG_CI, p, -0.06)
errobar(cyaB_CI, p, 0.06)
errobar(pilH_CI, p, 0.17)
errobar(cpdA_CI, p, 0.28)

p.x_range.range_padding = 0.1
p.yaxis.axis_label = 'speed (µm/s)'
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.legend.location = "top_left"
p.legend.orientation = "horizontal"

p.output_backend = 'svg'
bokeh.io.show(p)

In [51]:
size_circles = 8
labels=['WT liq', 'WT sol', 'cpdA-', 'pilH-', 'cyaB-', 'pilG-', 'pilTU']
p_dots = bokeh.plotting.figure(
    width=600, 
    height=400,  
    x_axis_type='linear',
    x_range=(0, 1),
    y_axis_type='linear',
    y_range=(0, 4),
    x_axis_label = parameter + ' (µm)',
    title="Track mean speed on liquid vs. solid grown cells"
)

errobar_dots(fliC_CI[0], p_dots, 3.5)
p_dots.circle(
    x = fliC[0],
    y = 3.5,
    color = 'grey',
    line_color='black',
    alpha=0.8,
    size=size_circles,
)
errobar_dots(fliC_CI[1], p_dots, 3)
p_dots.circle(
    x = fliC[1],
    y = 3,
    color = 'grey',
    line_color='black',
    alpha=0.8,
    size=size_circles,
)
errobar_dots(cpdA_CI[0], p_dots, 2.5)
p_dots.circle(
    x = cpdA[0],
    y = 2.5,
    color = 'orange',
    line_color='black',
    alpha=0.8,
    size=size_circles,
)
errobar_dots(pilH_CI[0], p_dots, 2)
p_dots.circle(
    x = pilH[0],
    y = 2,
    color = 'red',
    line_color='black',
    alpha=0.8,
    size=size_circles,
)
errobar_dots(cyaB_CI[0], p_dots, 1.5)
p_dots.circle(
    x = cyaB[0],
    y = 1.5,
    color = 'cyan',
    line_color='black',
    alpha=0.8,
    size=size_circles,
)
errobar_dots(pilG_CI[0], p_dots, 1)
p_dots.circle(
    x = pilG[0],
    y = 1,
    color = 'green',
    line_color='black',
    alpha=0.8,
    size=size_circles,
)
errobar_dots(pilTU_CI[0], p_dots, 0.5)
p_dots.circle(
    x = pilTU[0],
    y = 0.5,
    color = 'black',
    line_color='black',
    alpha=0.8,
    size=size_circles,
)

p_dots.xgrid.grid_line_color = None
p_dots.ygrid.grid_line_color = None
p_dots.xaxis.minor_tick_line_color = None
p_dots.yaxis.minor_tick_line_color = None
p_dots.yaxis.ticker = [0.5, 1, 1.5, 2, 2.5, 3, 3.5]
p_dots.yaxis.major_label_overrides = {0.5: 'pilTU-', 1: 'pilG-', 1.5: 'cyaB-', 2: 'pilH-', 2.5: 'cpdA-', 3: 'WT sol', 3.5: 'WT liq'}

p_dots.output_backend = 'svg'
bokeh.io.show(p_dots)

In [39]:
p.line(
        y=[pos-0.02,pos+0.02],
        x=[pili_IC[1], pili_IC[1]],
        line_color = 'black',
        alpha=1
    )
    p.line(
        y=[pos-0.02,pos+0.02],
        x=[pili_IC[0], pili_IC[0]],
        line_color = 'black',
        alpha=1
    )

[['0.135', '0.164075'], ['0.705', '0.748']]

In [7]:
colors = bokeh.palettes.d3['Category10'][10]
parameter='meanRMSD'
parameter2='IC_meanRMSD'
parameter3='medianRMSD'
parameter4='IC_medianRMSD'


p = bokeh.plotting.figure(
    width=600, 
    height=600,  
    x_axis_type='linear',
    y_axis_type='linear',
    x_axis_label = 'Time (h)',
    y_axis_label = parameter + ' (µm)',
    title="Root mean squared displacement (bootstrap mean, 95% CI)"
)

p1 = bokeh.plotting.figure(
    width=600, 
    height=600,  
    x_axis_type='linear',
    y_axis_type='linear',
    x_axis_label = 'Time (h)',
    y_axis_label = parameter3 + ' (µm)',
    title="Root mean squared displacement (bootstrap median, 95% CI)"
)

In [8]:

for s, strain in enumerate(Strains):
    p.line(
        source = df_boot_stats.loc[(df_boot_stats['Strain'] == strain), ['Strain', 'TimePoint', parameter, parameter2, parameter3, parameter4]],
        x = 'TimePoint',
        y = parameter,
        color = colors[s],
        alpha=0.8,
        #legend = 'Strain'
    )
    p.circle(
        source = df_boot_stats.loc[(df_boot_stats['Strain'] == strain), ['Strain', 'TimePoint', parameter, parameter2, parameter3, parameter4]],
        x = 'TimePoint',
        y = parameter,
        color = colors[s],
        alpha=0.8,
        size=5,
    )

for s, strain in enumerate(Strains):
        for tp in TimePoints:
            b = [tp,tp]
            m=df_boot_stats.loc[(df_boot_stats['Strain'] == strain) & (df_boot_stats['TimePoint'] == tp), [parameter]].values[0][0]
            sem=list(df_boot_stats.loc[(df_boot_stats['Strain'] == strain) & (df_boot_stats['TimePoint'] == tp), [parameter2]].values[0][0]) #float(semsS[(semsS['Strain'] == strain ) & (semsS['TimePoint'] == tp)][parameter]),
            a=sem
            #if (m-sem < 0):
            #    a = [0, m+sem]
            #else: a = [m-sem, m+sem]
            p.line(
                x = b,
                y = a,
                color = colors[s],
                alpha=0.5,
                line_width=3
            )

for s, strain in enumerate(Strains):
    p1.line(
        source = df_boot_stats.loc[(df_boot_stats['Strain'] == strain), ['Strain', 'TimePoint', parameter, parameter2, parameter3, parameter4]],
        x = 'TimePoint',
        y = parameter3,
        color = colors[s],
        alpha=0.8,
        legend = 'Strain'
    )
    p1.circle(
        source = df_boot_stats.loc[(df_boot_stats['Strain'] == strain), ['Strain', 'TimePoint', parameter, parameter2, parameter3, parameter4]],
        x = 'TimePoint',
        y = parameter3,
        color = colors[s],
        alpha=0.8,
        size=5,
    )

for s, strain in enumerate(Strains):
        for tp in TimePoints:
            b = [tp,tp]
            m=df_boot_stats.loc[(df_boot_stats['Strain'] == strain) & (df_boot_stats['TimePoint'] == tp), [parameter3]].values[0][0]
            sem=list(df_boot_stats.loc[(df_boot_stats['Strain'] == strain) & (df_boot_stats['TimePoint'] == tp), [parameter4]].values[0][0]) #float(semsS[(semsS['Strain'] == strain ) & (semsS['TimePoint'] == tp)][parameter]),
            a=sem
            #if (m-sem < 0):
            #    a = [0, m+sem]
            #else: a = [m-sem, m+sem]
            p1.line(
                x = b,
                y = a,
                color = colors[s],
                alpha=0.5,
                line_width=3
            )

p.output_backend = 'svg'
p1.output_backend = 'svg'

bokeh.io.show(bokeh.layouts.row(p, p1))

ERROR:bokeh.core.validation.check:E-1006 (NON_MATCHING_DATA_SOURCES_ON_LEGEND_ITEM_RENDERERS): LegendItem.label is a field, but renderer data sources don't match: LegendItem(id='2001', ...)


In [9]:
colors = bokeh.palettes.d3['Category10'][10]
parameter1='meanRMSD'
parameter2='medianRMSD'
IC1='IC_meanRMSD'
IC2='IC_medianRMSD'
p2 = bokeh.plotting.figure(
    width=600, 
    height=600,  
    x_axis_type='linear',
    y_axis_type='linear',
    x_axis_label = 'Time (h)',
    y_axis_label = parameter1 + ' (µm)',
    title="Root mean squared displacement (bootstrap mean, 95% IC)"
)

p3 = bokeh.plotting.figure(
    width=600, 
    height=600,  
    x_axis_type='linear',
    y_axis_type='linear',
    x_axis_label = 'Time (h)',
    y_axis_label = parameter2 + ' (µm)',
    title="Root mean squared displacement (bootstrap median, 95% IC)"
)

In [10]:

for s, strain in enumerate(Strains):
    p2.line(
        source = df_boot_stats.loc[(df_boot_stats['Strain'] == strain), ['Strain', 'TimePoint', parameter1]],
        x = 'TimePoint',
        y = parameter1,
        color = colors[s],
        alpha=0.8,
        legend = 'Strain'
    )
    p2.circle(
        source = df_boot_stats.loc[(df_boot_stats['Strain'] == strain), ['Strain', 'TimePoint', parameter1]],
        x = 'TimePoint',
        y = parameter1,
        color = colors[s],
        alpha=0.8,
        size=5
    )


for s, strain in enumerate(Strains):
    p3.line(
        source = df_boot_stats.loc[(df_boot_stats['Strain'] == strain), ['Strain', 'TimePoint', parameter2]],
        x = 'TimePoint',
        y = parameter2,
        color = colors[s],
        alpha=0.8,
        legend = 'Strain'
    )
    p3.circle(
        source = df_boot_stats.loc[(df_boot_stats['Strain'] == strain), ['Strain', 'TimePoint', parameter2]],
        x = 'TimePoint',
        y = parameter2,
        color = colors[s],
        alpha=0.8,
        size=5
    )  

for s, strain in enumerate(Strains):
        for tp in TimePoints:
            b = [tp,tp]
            m=df_boot_stats.loc[(df_boot_stats['Strain'] == strain) & (df_boot_stats['TimePoint'] == tp), [parameter1]].values[0][0]
            ic=df_boot_stats.loc[(df_boot_stats['Strain'] == strain) & (df_boot_stats['TimePoint'] == tp), [IC1]].values[0][0]
            if (m-ic[0] < 0):
                a = [0, m+ic[1]]
            else: a = [m-ic[0], m+ic[1]]           
            p2.line(
                x = b,
                y = ic,
                color = colors[s],
                alpha=0.5,
                line_width=3
            )
for s, strain in enumerate(Strains):
        for tp in TimePoints:
            b = [tp,tp]
            m=df_boot_stats.loc[(df_boot_stats['Strain'] == strain) & (df_boot_stats['TimePoint'] == tp), [parameter2]].values[0][0]
            ic=df_boot_stats.loc[(df_boot_stats['Strain'] == strain) & (df_boot_stats['TimePoint'] == tp), [IC2]].values[0][0]
            if (m-ic[0] < 0):
                a = [0, m+ic[1]]
            else: a = [m-ic[0], m+ic[1]]            
            p3.line(
                x = b,
                y = ic,
                color = colors[s],
                alpha=0.5,
                line_width=3
            )

p2.output_backend = 'svg'
p3.output_backend = 'svg'

bokeh.io.show(bokeh.layouts.row(p2, p3))



UFuncTypeError: ufunc 'subtract' did not contain a loop with signature matching types (dtype('<U32'), dtype('<U32')) -> dtype('<U32')

In [14]:
colors = bokeh.palettes.d3['Category10'][10]
parameter1='RMSD'
parameter2='PaQa_RFP_ratio'
p5 = bokeh.plotting.figure(
    width=600, 
    height=600,  
    x_axis_type='log',
    y_axis_type='log',
    x_axis_label = parameter1,
    y_axis_label = parameter2,
    title="PaQa/RFP ratio vs RMSD"
)
Strains2=['fliC-', 'pilH-fliC-']
for s, strain in enumerate(Strains):
    p5.circle(
        source = df_tracks.loc[(df_tracks['Strain'] == strain) & (df_tracks['TRACK_START'] == 0), [parameter1, parameter2]],
        x = parameter1,
        y = parameter2,
        color = colors[s],
        alpha=0.8,
        size=5,
        legend=strain
    )

bokeh.io.show(p5)

  and should_run_async(code)


KeyError: "Passing list-likes to .loc or [] with any missing labels is no longer supported. The following labels were missing: Index(['PaQa_RFP_ratio'], dtype='object'). See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike"

In [15]:
for strain in Strains:
    X=np.array(df_tracks.loc[(df_tracks['Strain'] == strain) & (df_tracks['TRACK_START'] == 0), [parameter1]].values, dtype=np.float32)
    Y=np.array(df_tracks.loc[(df_tracks['Strain'] == strain) & (df_tracks['TRACK_START'] == 0), [parameter2]].values, dtype=np.float32)
    print(parameter1+' vs '+parameter2+' in '+strain+':')
    [spearman_r, spearman_p]=stats.spearmanr(X, Y)
    print('Spearman correlation = '+str(spearman_r)+', p-value = '+ str(spearman_p))
    [pearson_r, pearson_p]=stats.pearsonr(X, Y)
    print('Pearson correlation = '+str(pearson_r)+', p-value = '+str(pearson_p))
    print('')

  and should_run_async(code)


KeyError: "None of [Index(['PaQa_RFP_ratio'], dtype='object')] are in the [columns]"

In [16]:
colors = bokeh.palettes.d3['Category10'][10]
parameter1='TimePoint'
parameter2='PaQa_RFP_ratio'
p6 = bokeh.plotting.figure(
    width=600, 
    height=600,  
    x_axis_type='linear',
    y_axis_type='log',
    x_axis_label = parameter1,
    y_axis_label = parameter2,
    title="PaQa/RFP ratio vs timePoint"
)
Strains2=['fliC-', 'pilH-fliC-']
for s, strain in enumerate(Strains2):
    p6.circle(
        source = df_tracks.loc[(df_tracks['Strain'] == strain) & (df_tracks['TRACK_START'] == 0), [parameter1, parameter2]],
        x = parameter1,
        y = parameter2,
        color = colors[s],
        alpha=0.8,
        size=5,
        legend=strain
    )

bokeh.io.show(p6)

  and should_run_async(code)


KeyError: "Passing list-likes to .loc or [] with any missing labels is no longer supported. The following labels were missing: Index(['PaQa_RFP_ratio'], dtype='object'). See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike"