In [1]:
import os
import glob
import numpy as np
import bokeh.io
import bokeh.plotting
import bokeh.palettes
from bokeh.transform import jitter
import seaborn as sns
import matplotlib
from bokeh.models import HoverTool
from scipy import stats
import pandas as pd


from bokeh.layouts import row
bokeh.io.output_notebook()

In [2]:
#Functions
def bootstrap_sampling(my_array, bootstrap_samples, bootstrap_replicates):
    bs_samples=np.zeros((bootstrap_replicates, bootstrap_samples))
    bs_rep=np.arange(bootstrap_replicates)
    for i in bs_rep:
        bs_samples[i, :]=np.random.choice(my_array, bootstrap_samples)
    return bs_samples

def bootstrap_stats(bs_samples):
    samples_shape=np.shape(bs_samples)
    bs_means=np.zeros((samples_shape[0], 1))
    bs_IC_means=np.zeros((1, 2))
    bs_medians=np.zeros((samples_shape[0], 1))
    bs_IC_medians=np.zeros((1, 2))
    for i in np.arange(samples_shape[0]):
        bs_means[i]=np.mean(bs_samples[i, :])
        bs_medians[i]=np.median(bs_samples[i, :])
    bs_IC_means[0, 0]=np.quantile(bs_means, .025)
    bs_IC_means[0, 1]=np.quantile(bs_means, .975)
    bs_IC_medians[0, 0]=np.quantile(bs_medians, .025)
    bs_IC_medians[0, 1]=np.quantile(bs_medians, .975)
    return np.mean(bs_means), np.median(bs_medians), bs_IC_means, bs_IC_medians

In [3]:
#set root folder
path='D:/Pili_and_PaQa_counts_data/pilB_mNG/solid'
os.chdir(path)
extension = 'csv'
list_csv = []
for root, dirs, files in os.walk(path, topdown=False):
    for name in files:
        if extension in name:
            list_csv.append(os.path.join(root, name))
os.chdir("C:/users/tala/git/PhD_codes/Mechanosensation/Python_code/Pole_analysis/")
new_dir = "Data\\"
if not os.path.exists(new_dir):
    os.mkdir(new_dir)
os.chdir(new_dir)
combined_csv = pd.concat([pd.read_csv(f) for f in list_csv ], sort=False)
combined_csv['Strain'] = 'nan'
combined_csv['Growth'] = 'nan'

raw_names=list(combined_csv.Label.unique())
for file in raw_names:
    split_name=file.rsplit('_event',100)[0].rsplit('_',100)
    if 'Agarose' in split_name or 'sol' in split_name or 'Agar' in split_name:
        growth='Solid'
        if 'Agarose' in split_name:
            growthInd = split_name.index('Agarose')
        elif 'sol' in split_name:
            growthInd = split_name.index('sol')
        elif 'Agar' in split_name:
            growthInd = split_name.index('Agar')
    else:
        growth='Liquid'
        if 'liq' in split_name:
            growthInd = split_name.index('event*')
        else:
            growthInd = len(split_name)-3
    separator = '_'
    strain =  separator.join(split_name[0:growthInd])
    combined_csv.Strain.loc[(combined_csv['Label']==file)]=strain
    combined_csv.Growth.loc[(combined_csv['Label']==file)]=growth
combined_csv.to_csv( "PolesData.csv", index=False, encoding='utf-8-sig')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


In [4]:
df = pd.read_csv('PolesData.csv', sep=',', na_values='*')
df['PercentTotalFluoDim']=df['TotalFluorescencePoleDim']/df['CellTotalFluorescence']
df['PercentTotalFluoBright']=df['TotalFluorescencePoleBright']/df['CellTotalFluorescence']
df['TotalPili']=df['Nb_Pili_PoleDim']+df['Nb_Pili_PoleBright']
df['TotalFlagella']=df['Nb_Flagella_PoleDim']+df['Nb_Flagella_PoleBright']

y_param1='Nb_Pili_PoleDim'
y_param2='Nb_Pili_PoleBright'
tot_pili_limit=10
Ncells_per_pili_Dim=np.empty((tot_pili_limit), dtype='int16')
Ncells_per_pili_Bright=np.empty((tot_pili_limit), dtype='int16')
N=len(df)
print('Dim pole:')
print('NCells='+str(N), end=', ')
for i in range(tot_pili_limit):
    Ncells_per_pili_Dim[i]=len(df.loc[(df[y_param1]==i)])
    if (i<tot_pili_limit-1):
        print('N_'+str(i)+'pili='+str(Ncells_per_pili_Dim[i]), end=', ')
    else: print('N_'+str(i)+'pili='+str(Ncells_per_pili_Dim[i]))
print('Bright pole:')
print('Ncells='+str(N), end=', ')
for i in range(tot_pili_limit):
    Ncells_per_pili_Bright[i]=len(df.loc[(df[y_param2]==i)])
    if (i<tot_pili_limit-1):
        print('N_'+str(i)+'pili='+str(Ncells_per_pili_Bright[i]), end=', ')
    else: print('N_'+str(i)+'pili='+str(Ncells_per_pili_Bright[i]))

Dim pole:
NCells=232, N_0pili=190, N_1pili=28, N_2pili=11, N_3pili=2, N_4pili=1, N_5pili=0, N_6pili=0, N_7pili=0, N_8pili=0, N_9pili=0
Bright pole:
Ncells=232, N_0pili=178, N_1pili=35, N_2pili=8, N_3pili=7, N_4pili=0, N_5pili=2, N_6pili=0, N_7pili=2, N_8pili=0, N_9pili=0


In [5]:
sumPoles=(Ncells_per_pili_Dim+Ncells_per_pili_Bright)
Prob_Dim_if_nb_pili = [(Ncells_per_pili_Dim[i]/sumPoles[i]) for i in range(tot_pili_limit) if sumPoles[i]>0]
Prob_Bright_if_nb_pili=[(Ncells_per_pili_Bright[i]/sumPoles[i]) for i in range(tot_pili_limit) if sumPoles[i]>0]

Prob_per_pili_Dim= Ncells_per_pili_Dim/N
Prob_per_pili_Bright= Ncells_per_pili_Bright/N

In [6]:
p0 = bokeh.plotting.figure(
    width=600, 
    height=600, 
    x_axis_label='# pili', 
    y_axis_type='linear',
    y_axis_label ='P(# pili)',
    title="Probability of having # pili"
)

x_1=range(tot_pili_limit)


p0.line(
    x=x_1,
    y=Prob_per_pili_Dim, 
    line_color = 'blue',
    #fill_color = 'blue',
    alpha=0.7,
    #legend = labelsAll[i]
    legend_label = 'Dim Pole'
)


p0.line(
    x=x_1,
    y=Prob_per_pili_Bright, 
    line_color = 'red',
    #fill_color = 'red',
    alpha=0.7,
    #legend = labelsAll[i]
    legend_label = 'Bright pole'
)


bokeh.io.show(p0)

In [7]:
p1 = bokeh.plotting.figure(
    width=600, 
    height=600, 
    x_axis_label='# pili', 
    y_axis_type='linear',
    y_axis_label ='P(# pili|pole state)',
    title="Probability of having # pili if bright or dim pole"
)

x_1=range(len(Prob_Dim_if_nb_pili))


p1.line(
    x=x_1,
    y=Prob_Dim_if_nb_pili, 
    line_color = 'blue',
    #fill_color = 'blue',
    alpha=0.7,
    #legend = labelsAll[i]
    legend_label = 'Dim Pole'
)


p1.line(
    x=x_1,
    y=Prob_Bright_if_nb_pili, 
    line_color = 'red',
    #fill_color = 'red',
    alpha=0.7,
    #legend = labelsAll[i]
    legend_label = 'Bright pole'
)


bokeh.io.show(p1)

In [8]:
nb_pili=np.zeros((tot_pili_limit, 1))
boot_meanDim=np.zeros((tot_pili_limit, 1))
boot_medianDim=np.zeros((tot_pili_limit, 1))
boot_IC_meanDim=np.zeros((tot_pili_limit, 2))
boot_IC_medianDim=np.zeros((tot_pili_limit, 2))
boot_meanBright=np.zeros((tot_pili_limit, 1))
boot_medianBright=np.zeros((tot_pili_limit, 1))
boot_IC_meanBright=np.zeros((tot_pili_limit, 2))
boot_IC_medianBright=np.zeros((tot_pili_limit, 2))
for n_pili in range(tot_pili_limit):
    cell_fluorescence_arrayDim=df.TotalFluorescencePoleDim.loc[(df[y_param1]==n_pili)].values
    cell_fluorescence_arrayBright=df.TotalFluorescencePoleBright.loc[(df[y_param2]==n_pili)].values
    nb_pili[n_pili]=n_pili
    if (len(cell_fluorescence_arrayDim) > 0):
        bs_Dim_fluorescence_array=bootstrap_sampling(cell_fluorescence_arrayDim, len(cell_fluorescence_arrayDim), 1000)
        [bs_means, bs_medians, IC_means, IC_medians]=bootstrap_stats(bs_Dim_fluorescence_array)
        boot_meanDim[n_pili]=bs_means
        boot_IC_meanDim[n_pili,:]=IC_means
        boot_medianDim[n_pili]=bs_medians
        boot_IC_medianDim[n_pili,:]=IC_medians
    if (len(cell_fluorescence_arrayBright) > 0):
        bs_Bright_fluorescence_array=bootstrap_sampling(cell_fluorescence_arrayBright, len(cell_fluorescence_arrayBright), 1000)
        [bs_means, bs_medians, IC_means, IC_medians]=bootstrap_stats(bs_Bright_fluorescence_array)
        boot_meanBright[n_pili]=bs_means
        boot_IC_meanBright[n_pili,:]=IC_means
        boot_medianBright[n_pili]=bs_medians
        boot_IC_medianBright[n_pili,:]=IC_medians

names = ['TotalPili', 'bootMeanDim','bootMedianDim', 'bootMeanBright','bootMedianBright']
data = np.concatenate((nb_pili, boot_meanDim, boot_medianDim, boot_meanBright, boot_medianBright), axis=1)
df_boot_stats=pd.DataFrame(data=np.transpose(data), index=names).T
df_boot_stats['IC_meanDim']=list(boot_IC_meanDim)
df_boot_stats['IC_medianDim']=list(boot_IC_medianDim)
df_boot_stats['IC_meanBright']=list(boot_IC_meanBright)
df_boot_stats['IC_medianBright']=list(boot_IC_medianBright)
df_boot_stats

Unnamed: 0,TotalPili,bootMeanDim,bootMedianDim,bootMeanBright,bootMedianBright,IC_meanDim,IC_medianDim,IC_meanBright,IC_medianBright
0,0.0,8345.544821,7866.5,14790.579758,13860.0,"[7853.380131578947, 8821.478552631579]","[7309.237499999999, 8433.0]","[13805.492556179775, 15736.789606741571]","[12509.0, 14678.5]"
1,1.0,14238.189286,11316.5,18916.312,18117.0,"[11547.758035714285, 17353.189285714285]","[9257.0, 15953.0]","[16229.187142857143, 21936.960714285713]","[15057.0, 20669.649999999983]"
2,2.0,18475.577545,13593.0,21141.427375,20022.0,"[12569.593181818183, 25507.64090909091]","[10035.0, 26062.0]","[18964.5875, 23762.04375]","[18856.0, 25323.0]"
3,3.0,24199.2465,24194.5,21632.413857,18004.0,"[23763.0, 24626.0]","[23763.0, 24626.0]","[14938.25, 30422.035714285714]","[12441.0, 27419.0]"
4,4.0,39962.0,39962.0,0.0,0.0,"[39962.0, 39962.0]","[39962.0, 39962.0]","[0.0, 0.0]","[0.0, 0.0]"
5,5.0,0.0,0.0,43516.357,43676.0,"[0.0, 0.0]","[0.0, 0.0]","[29163.0, 58189.0]","[29163.0, 58189.0]"
6,6.0,0.0,0.0,0.0,0.0,"[0.0, 0.0]","[0.0, 0.0]","[0.0, 0.0]","[0.0, 0.0]"
7,7.0,0.0,0.0,33078.948,33285.0,"[0.0, 0.0]","[0.0, 0.0]","[23473.0, 43097.0]","[23473.0, 43097.0]"
8,8.0,0.0,0.0,0.0,0.0,"[0.0, 0.0]","[0.0, 0.0]","[0.0, 0.0]","[0.0, 0.0]"
9,9.0,0.0,0.0,0.0,0.0,"[0.0, 0.0]","[0.0, 0.0]","[0.0, 0.0]","[0.0, 0.0]"


In [9]:
x_param1='TotalFluorescencePoleDim'#'PercentTotalFluoDim'
y_param1='Nb_Pili_PoleDim'
x_param2='TotalFluorescencePoleBright' #'PercentTotalFluoBright'
y_param2='Nb_Pili_PoleBright'
param2='TotalPili'
IC1='IC_medianDim'
IC2='IC_medianBright'
param1='bootMedianDim'
param3='bootMedianBright'
p2 = bokeh.plotting.figure(
    width=600, 
    height=600, 
    x_axis_label='# Pili', 
    y_axis_type='linear',
    y_axis_label ='Pole Total fluorescence',
    title="Pole fluorescence vs #Pili (bootstrap median, 95% CI)"
)

p2.circle(
    source=df,
    x=y_param1,
    y=x_param1, 
    line_color = 'blue',
    fill_color = 'blue',
    alpha=0.7,
    #legend = labelsAll[i]
    legend_label = 'Dim Pole'
)
p2.circle(
    source=df,
    x=y_param2,
    y=x_param2, 
    line_color = 'red',
    fill_color = 'red',
    alpha=0.7,
    #legend = labelsAll[i]
    legend_label = 'Bright Pole'
)

for n_pili in list(df_boot_stats[param2]):
    b = [n_pili,n_pili]
    ic=df_boot_stats.loc[(df_boot_stats[param2] == n_pili), [IC1]].values[0][0]
    p2.line(
        x = b,
        y = ic,
        color = 'black',
        alpha=0.7,
        line_width=3
    )
    
for n_pili in list(df_boot_stats['TotalPili']):
    b = [n_pili,n_pili]
    ic=df_boot_stats.loc[(df_boot_stats[param2] == n_pili), [IC2]].values[0][0]
    p2.line(
        x = b,
        y = ic,
        color = 'black',
        alpha=0.7,
        line_width=3
    )

p2.line(
    source = df_boot_stats.loc[:, [param2, param1]],
    x = param2,
    y = param1,
    line_color = 'blue',
    alpha=0.6
)


p2.circle(
    source = df_boot_stats.loc[:, [param2, param1]],
    x = param2,
    y = param1,
    line_color = 'black',
    fill_color = 'white',
    alpha=0.6,
    size=10
)

p2.line(
    source = df_boot_stats.loc[:, [param2, param3]],
    x = param2,
    y = param3,
    line_color = 'red',
    alpha=0.6
) 

p2.circle(
    source = df_boot_stats.loc[:, [param2, param3]],
    x = param2,
    y = param3,
    line_color = 'black',
    fill_color = 'white',
    alpha=0.6,
    size=10
)  

p2.output_backend = 'svg'

bokeh.io.show(p2)

In [10]:
x_param3='CellTotalFluorescence'
y_param3='PolarRatio'

print(x_param3+' vs '+y_param3+':')
[spearman_r, spearman_p]=stats.spearmanr(df[x_param3], df[y_param3])
print('Spearman correlation = '+str(spearman_r)+', p-value = '+ str(spearman_p))

[pearson_r, pearson_p]=stats.pearsonr(df[x_param3], df[y_param3])
print('Pearson correlation = '+str(pearson_r)+', p-value = '+str(pearson_p))

CellTotalFluorescence vs PolarRatio:
Spearman correlation = 0.46827340189779126, p-value = 4.793328422441306e-14
Pearson correlation = 0.41044029189547626, p-value = 7.674395724164643e-11


In [11]:
x_param3='CellTotalFluorescence'
y_param3='PolarRatio'

p1 = bokeh.plotting.figure(
    width=600, 
    height=600, 
    x_axis_label='Total fluorescence', 
    y_axis_type='linear',
    y_axis_label ='Polar Ratio',
    title="Polar Ratio vs Cell Total fluorescence"
)

p1.circle(
    source=df,
    x=x_param3,
    y=y_param3, 
    line_color = 'black',
    fill_color = 'gray',
    alpha=0.7,
    #legend = labelsAll[i]
)

bokeh.io.show(p1)

In [12]:
y_param1='Nb_Pili_PoleDim'
y_param2='Nb_Pili_PoleBright'
x_param1='TotalFluorescencePoleDim'
x_param2='TotalFluorescencePoleBright'
parameterX='TotalPili'
parameterY='PoleTotalFluorescence'
poles_pili_dim=list(df[y_param1])
poles_pili_bright=list(df[y_param2])
poles_pili=poles_pili_dim+poles_pili_bright
poles_pili=np.asarray(poles_pili).reshape(len(poles_pili),1)
poles_fluo_dim=list(df[x_param1])
poles_fluo_bright=list(df[x_param2])
poles_fluo=poles_fluo_dim+poles_fluo_bright
poles_fluo=np.asarray(poles_fluo).reshape(len(poles_fluo),1)

names = [parameterX, parameterY]
data = np.concatenate((poles_pili, poles_fluo), axis=1)
df_long=pd.DataFrame(data=np.transpose(data), index=names).T

In [13]:
x_param3=parameterX
y_param3=parameterY

print(x_param3+' vs '+y_param3+':')
[spearman_r, spearman_p]=stats.spearmanr(df_long[x_param3], df_long[y_param3])
print('Spearman correlation = '+str(spearman_r)+', p-value = '+ str(spearman_p))

[pearson_r, pearson_p]=stats.pearsonr(df_long[x_param3], df_long[y_param3])
print('Pearson correlation = '+str(pearson_r)+', p-value = '+str(pearson_p))

TotalPili vs PoleTotalFluorescence:
Spearman correlation = 0.3639797651036486, p-value = 5.556434129647949e-16
Pearson correlation = 0.4633425864931939, p-value = 4.5194878367714456e-26


In [14]:
nb_pili=np.zeros((tot_pili_limit, 1))
boot_mean=np.zeros((tot_pili_limit, 1))
boot_median=np.zeros((tot_pili_limit, 1))
boot_IC_mean=np.zeros((tot_pili_limit, 2))
boot_IC_median=np.zeros((tot_pili_limit, 2))

for n_pili in range(tot_pili_limit):
    Pole_fluorescence_array=df_long.PoleTotalFluorescence.loc[(df_long[parameterX]==n_pili)].values
    nb_pili[n_pili]=n_pili
    if (len(Pole_fluorescence_array) > 0):
        bs_fluorescence_array=bootstrap_sampling(Pole_fluorescence_array, len(Pole_fluorescence_array), 1000)
        [bs_means, bs_medians, IC_means, IC_medians]=bootstrap_stats(bs_fluorescence_array)
        boot_mean[n_pili]=bs_means
        boot_IC_mean[n_pili,:]=IC_means
        boot_median[n_pili]=bs_medians
        boot_IC_median[n_pili,:]=IC_medians

names = ['TotalPili', 'bootMean','bootMedian']
data = np.concatenate((nb_pili, boot_mean, boot_median), axis=1)
df_boot_stats_tot=pd.DataFrame(data=np.transpose(data), index=names).T
df_boot_stats_tot['IC_mean']=list(boot_IC_mean)
df_boot_stats_tot['IC_median']=list(boot_IC_median)
df_boot_stats_tot

Unnamed: 0,TotalPili,bootMean,bootMedian,IC_mean,IC_median
0,0.0,11477.683163,9704.0,"[10861.840557065218, 12116.75081521739]","[9292.987500000001, 10658.300000000001]"
1,1.0,16843.540698,15239.0,"[14830.825793650793, 18966.75119047619]","[12005.0, 18660.0]"
2,2.0,19640.080684,19031.0,"[15946.905263157894, 23268.527631578945]","[13593.0, 20455.0]"
3,3.0,22089.926444,20776.0,"[16209.577777777778, 29362.491666666665]","[12441.0, 24626.0]"
4,4.0,39962.0,39962.0,"[39962.0, 39962.0]","[39962.0, 39962.0]"
5,5.0,43588.922,43676.0,"[29163.0, 58189.0]","[29163.0, 58189.0]"
6,6.0,0.0,0.0,"[0.0, 0.0]","[0.0, 0.0]"
7,7.0,33520.488,33285.0,"[23473.0, 43097.0]","[23473.0, 43097.0]"
8,8.0,0.0,0.0,"[0.0, 0.0]","[0.0, 0.0]"
9,9.0,0.0,0.0,"[0.0, 0.0]","[0.0, 0.0]"


In [15]:
param2='TotalPili'
IC1='IC_median'
param1='bootMedian'

p3 = bokeh.plotting.figure(
    width=600, 
    height=600, 
    x_axis_label='# pili', 
    y_axis_type='linear',
    y_axis_label ='pole fluorescence',
    title="Pole total fluorescence vs # pili (bootstrap median, 95% CI)"
)

p3.circle(
    source=df_long,
    x=parameterX,
    y=parameterY, 
    line_color = 'green',
    fill_color = 'green',
    alpha=0.4,
    #legend = labelsAll[i]
)

for n_pili in list(df_boot_stats_tot[param2]):
    b = [n_pili,n_pili]
    ic=df_boot_stats_tot.loc[(df_boot_stats_tot[param2] == n_pili), [IC1]].values[0][0]
    p3.line(
        x = b,
        y = ic,
        color = 'black',
        alpha=0.7,
        line_width=3
    )

p3.line(
    source = df_boot_stats_tot.loc[:, [param2, param1]],
    x = param2,
    y = param1,
    line_color = 'black',
    alpha=0.6
)


p3.circle(
    source = df_boot_stats_tot.loc[:, [param2, param1]],
    x = param2,
    y = param1,
    line_color = 'black',
    fill_color = 'white',
    alpha=0.6,
    size=10
)

p3.output_backend = 'svg'

bokeh.io.show(p3)