In [1]:
import os
import glob
import numpy as np
import bokeh.io
import bokeh.plotting
import bokeh.palettes
from bokeh.transform import jitter
import seaborn as sns
import matplotlib
from bokeh.models import HoverTool
from scipy import stats
import pandas as pd
import math

from bokeh.layouts import row
bokeh.io.output_notebook()

In [2]:
def bootstrap_sampling(my_array, bootstrap_samples, bootstrap_replicates):
    bs_samples=np.zeros((bootstrap_replicates, bootstrap_samples))
    bs_rep=np.arange(bootstrap_replicates)
    for i in bs_rep:
        bs_samples[i, :]=np.random.choice(my_array, bootstrap_samples)
    return bs_samples

def bootstrap_stats(bs_samples):
    samples_shape=np.shape(bs_samples)
    bs_means=np.zeros((samples_shape[0], 1))
    bs_IC_means=np.zeros((1, 2))
    bs_medians=np.zeros((samples_shape[0], 1))
    bs_IC_medians=np.zeros((1, 2))
    for i in np.arange(samples_shape[0]):
        bs_means[i]=np.mean(bs_samples[i, :])
        bs_medians[i]=np.median(bs_samples[i, :])
    bs_IC_means[0, 0]=np.quantile(bs_means, .025)
    bs_IC_means[0, 1]=np.quantile(bs_means, .975)
    bs_IC_medians[0, 0]=np.quantile(bs_medians, .025)
    bs_IC_medians[0, 1]=np.quantile(bs_medians, .975)
    return np.mean(bs_means), np.median(bs_medians), bs_IC_means, bs_IC_medians

In [3]:
#set root folder
path='D:/Pili_and_PaQa_counts_data/fliC-/solid/20200116'
os.chdir(path)
extension = 'csv'
list_csv = []
for root, dirs, files in os.walk(path, topdown=False):
    for name in files:
        if extension in name:
            list_csv.append(os.path.join(root, name))
os.chdir("C:/Users/tala/Desktop/Pole_analysis")
new_dir = "Data_PaQa\\"
if not os.path.exists(new_dir):
    os.mkdir(new_dir)
os.chdir(new_dir)
combined_csv = pd.concat([pd.read_csv(f) for f in list_csv ], sort=False)
combined_csv.to_csv( "Pili_PaQa_Data.csv", index=False, encoding='utf-8-sig')

In [4]:
df = pd.read_csv("Pili_PaQa_Data.csv", sep=',', na_values='*')
df['PercentTotalFluoDim']=df['TotalFluorescencePoleDim']/df['CellTotalFluorescence']
df['PercentTotalFluoBright']=df['TotalFluorescencePoleBright']/df['CellTotalFluorescence']
df['TotalPili']=df['Nb_Pili_PoleDim']+df['Nb_Pili_PoleBright']
df['TotalFlagella']=df['Nb_Flagella_PoleDim']+df['Nb_Flagella_PoleBright']
param1='CellTotalFluorescence'
param2='TotalPili'
poisson_lambda=df[param2].mean()

tot_pili_limit=16
Ncells_per_pili=np.empty((tot_pili_limit), dtype='int16')
N=len(df)
print('Ncells='+str(N), end=', ')
for i in range(tot_pili_limit):
    Ncells_per_pili[i]=len(df.loc[(df[param2]==i)])
    if (i<tot_pili_limit-1):
        print('N_'+str(i)+'pili='+str(Ncells_per_pili[i]), end=', ')
    else: print('N_'+str(i)+'pili='+str(Ncells_per_pili[i]))

N=92, N_0pili=12, N_1pili=21, N_2pili=16, N_3pili=19, N_4pili=11, N_5pili=4, N_6pili=4, N_7pili=2, N_8pili=0, N_9pili=1, N_10pili=1, N_11pili=0, N_12pili=1, N_13pili=0, N_14pili=0, N_15pili=0


In [5]:
nb_pili=np.zeros((tot_pili_limit, 1))
boot_mean=np.zeros((tot_pili_limit, 1))
boot_median=np.zeros((tot_pili_limit, 1))
boot_IC_mean=np.zeros((tot_pili_limit, 2))
boot_IC_median=np.zeros((tot_pili_limit, 2))
for n_pili in range(tot_pili_limit):
    cell_fluorescence_array=df.CellTotalFluorescence.loc[(df[param2]==n_pili)].values
    nb_pili[n_pili]=n_pili
    if (len(cell_fluorescence_array) > 0):
        bs_cell_fluorescence_array=bootstrap_sampling(cell_fluorescence_array, len(cell_fluorescence_array), 1000)
        [bs_means, bs_medians, IC_means, IC_medians]=bootstrap_stats(bs_cell_fluorescence_array)
        boot_mean[n_pili]=bs_means
        boot_IC_mean[n_pili,:]=IC_means
        boot_median[n_pili]=bs_medians
        boot_IC_median[n_pili,:]=IC_medians

names = [param2, 'bootMean','bootMedian']
data = np.concatenate((nb_pili, boot_mean, boot_median), axis=1)
df_boot_stats=pd.DataFrame(data=np.transpose(data), index=names).T
df_boot_stats['IC_mean']=list(boot_IC_mean)
df_boot_stats['IC_median']=list(boot_IC_median)

In [6]:
Prob_per_pili = Ncells_per_pili/N
Prob_per_pili.sum()

1.0

In [8]:
p0 = bokeh.plotting.figure(
    width=600, 
    height=600, 
    x_axis_label='# pili', 
    y_axis_type='linear',
    y_axis_label ='P(# pili)',
    title="Probability of having # pili"
)

x_1=range(tot_pili_limit)

p0.line(
    x=x_1,
    y=Prob_per_pili, 
    line_color = 'blue',
    #fill_color = 'blue',
    alpha=0.7,
    #legend = labelsAll[i]
    legend = 'Data'
)

bokeh.io.show(p0)

In [12]:
print(param1+' vs '+param2+':')

[spearman_r, spearman_p]=stats.spearmanr(df[param1], df[param2])
print('Spearman correlation = '+str(spearman_r)+', p-value = '+ str(spearman_p))

[pearson_r, pearson_p]=stats.pearsonr(df[param1], df[param2])
print('Pearson correlation = '+str(pearson_r)+', p-value = '+str(pearson_p))

CellTotalFluorescence vs TotalPili:
Spearman correlation = 0.4451217674457984, p-value = 8.756699194870884e-06
Pearson correlation = 0.40669993192523035, p-value = 5.747830022718905e-05


In [10]:
parameter1='bootMean'
parameter2='bootMedian'
IC1='IC_mean'
IC2='IC_median'
p3 = bokeh.plotting.figure(
    width=600, 
    height=600,  
    x_axis_type='linear',
    y_axis_type='linear',
    x_axis_label = '#Pili',
    y_axis_label = 'total fluorescence',
    title="Mean cell total fluorescence per pili nb (bootstrap mean, 95% IC)"
)

p4 = bokeh.plotting.figure(
    width=600, 
    height=600,  
    x_axis_type='linear',
    y_axis_type='linear',
    x_axis_label = '#Pili',
    y_axis_label = 'total fluorescence',
    title="Median cell total fluorescence per pili nb (bootstrap median, 95% IC)"
)

In [11]:
p3.circle(
    source=df,
    x=param2,
    y=param1, 
    line_color = 'black',
    fill_color = 'gray',
    alpha=0.7,
    #legend = labelsAll[i]
)

p4.circle(
    source=df,
    x=param2,
    y=param1, 
    line_color = 'black',
    fill_color = 'gray',
    alpha=0.7,
    #legend = labelsAll[i]
)

for n_pili in list(df_boot_stats[param2]):
    b = [n_pili,n_pili]
    m=df_boot_stats.loc[(df_boot_stats[param2] == n_pili), [parameter1]].values[0][0]
    ic=df_boot_stats.loc[(df_boot_stats[param2] == n_pili), [IC1]].values[0][0]
    if (m-ic[0] < 0):
        a = [0, m+ic[1]]
    else: a = [m-ic[0], m+ic[1]]
    p3.line(
        x = b,
        y = ic,
        color = 'black',
        alpha=0.5,
        line_width=3
    )
    
for n_pili in list(df_boot_stats[param2]):
    b = [n_pili,n_pili]
    m=df_boot_stats.loc[(df_boot_stats[param2] == n_pili), [parameter2]].values[0][0]
    ic=df_boot_stats.loc[(df_boot_stats[param2] == n_pili), [IC2]].values[0][0]
    if (m-ic[0] < 0):
        a = [0, m+ic[1]]
    else: a = [m-ic[0], m+ic[1]]
    p4.line(
        x = b,
        y = ic,
        color = 'black',
        alpha=0.5,
        line_width=3
    )
    
p3.circle(
    source = df_boot_stats.loc[:, [param2, parameter1]],
    x = param2,
    y = parameter1,
    line_color = 'black',
    fill_color = 'white',
    alpha=0.6,
    size=10
)

p4.circle(
    source = df_boot_stats.loc[:, [param2, parameter2]],
    x = param2,
    y = parameter2,
    line_color = 'black',
    fill_color = 'white',
    alpha=0.6,
    size=10
)  

p3.output_backend = 'svg'
p4.output_backend = 'svg'

bokeh.io.show(bokeh.layouts.row(p3, p4))