In [1]:
import os
import glob
import numpy as np
import bokeh.io
import bokeh.plotting
import bokeh.palettes
from bokeh.transform import jitter
import seaborn as sns
import matplotlib
from bokeh.models import HoverTool
from scipy import stats
import pandas as pd


from bokeh.layouts import row
bokeh.io.output_notebook()

In [2]:
#Functions
def bootstrap_sampling(my_array, bootstrap_samples, bootstrap_replicates):
    bs_samples=np.zeros((bootstrap_replicates, bootstrap_samples))
    bs_rep=np.arange(bootstrap_replicates)
    for i in bs_rep:
        bs_samples[i, :]=np.random.choice(my_array, bootstrap_samples)
    return bs_samples

def bootstrap_stats(bs_samples):
    samples_shape=np.shape(bs_samples)
    bs_means=np.zeros((samples_shape[0], 1))
    bs_IC_means=np.zeros((1, 2))
    bs_medians=np.zeros((samples_shape[0], 1))
    bs_IC_medians=np.zeros((1, 2))
    for i in np.arange(samples_shape[0]):
        bs_means[i]=np.mean(bs_samples[i, :])
        bs_medians[i]=np.median(bs_samples[i, :])
    bs_IC_means[0, 0]=np.quantile(bs_means, .025)
    bs_IC_means[0, 1]=np.quantile(bs_means, .975)
    bs_IC_medians[0, 0]=np.quantile(bs_medians, .025)
    bs_IC_medians[0, 1]=np.quantile(bs_medians, .975)
    return np.mean(bs_means), np.median(bs_medians), bs_IC_means, bs_IC_medians

In [4]:
#set root folder
path='E:/Sauvegardes PhD/Covid-19/Pili_and_PaQa_counts_data/Pili_fluo/pilB_mNG/'
#path = "C:/Users/tala/Desktop/git/PhD_codes/Mechanosensation/Python_code/Pole_analysis/combined_pilB/"
Combine_Data = False
os.chdir(path)
extension = 'csv'
list_csv = []
for root, dirs, files in os.walk(path, topdown=False):
    for name in files:
        if extension in name:
            list_csv.append(os.path.join(root, name))
#print(list_csv)
os.chdir("C:/users/tala/Desktop/git/PhD_codes/Mechanosensation/Python_code/Pole_analysis/")
new_dir = "Data_pilB_old\\"
saved_file = "PolesData_pilB_old.csv"
if not os.path.exists(new_dir):
    os.mkdir(new_dir)
os.chdir(new_dir)
combined_csv = pd.concat([pd.read_csv(f) for f in list_csv ], sort=False)


raw_names=list(combined_csv.Label.unique())
#print(raw_names)

if not Combine_Data:
    combined_csv['Strain'] = 'nan'
    combined_csv['Growth'] = 'nan'
    for file in raw_names:
        print(file)
        split_name=file.rsplit('_event',100)[0].rsplit('_',100)
        if 'Agarose' in split_name or 'sol' in split_name or 'Agar' in split_name:
            growth='Solid'
            if 'Agarose' in split_name:
                growthInd = split_name.index('Agarose')
            elif 'sol' in split_name:
                growthInd = split_name.index('sol')
            elif 'Agar' in split_name:
                growthInd = split_name.index('Agar')
        else:
            growth='Liquid'
            if 'liq' in split_name:
                growthInd = split_name.index('liq')
            else:
                growthInd = len(split_name)-3
        separator = '_'
        strain =  separator.join(split_name[0:growthInd])
        combined_csv.Strain.loc[(combined_csv['Label']==file)]=strain
        combined_csv.Growth.loc[(combined_csv['Label']==file)]=growth
combined_csv.to_csv( saved_file, index=False, encoding='utf-8-sig')
combined_csv.head()

pilB_mNG_fliC-_liq_BR=1_0_event1_tirf_RAW_Stack.tif
pilB_mNG_fliC-_liq_BR=1_0_event2_tirf_RAW_Stack.tif
nan


AttributeError: 'float' object has no attribute 'rsplit'

In [4]:
df = pd.read_csv(saved_file, sep=',', na_values='*')
df['PoleRange']=df['AreaPoleDim']/df['CellArea']
pole_range=df.PoleRange.mean()
print(pole_range)
ref_ratio = (2*pole_range)/(1-2*pole_range)
df['PercentTotalFluoDim']=df['TotalFluorescencePoleDim']/df['CellTotalFluorescence']
df['PercentTotalFluoBright']=df['TotalFluorescencePoleBright']/df['CellTotalFluorescence']
df['PercentTotalFluoCenter']=(df['CellTotalFluorescence']-df['TotalFluorescencePoleBright']-df['TotalFluorescencePoleDim'])/df['CellTotalFluorescence']

df['polar_ratio']=(df['TotalFluorescencePoleDim']+df['TotalFluorescencePoleBright'])/(df['CellTotalFluorescence']-(df['TotalFluorescencePoleDim']+df['TotalFluorescencePoleBright']))/((2*df['PoleRange']/(1-2*df['PoleRange'])))

df['TotalPili']=df['Nb_Pili_PoleDim']+df['Nb_Pili_PoleBright']
df['TotalFlagella']=df['Nb_Flagella_PoleDim']+df['Nb_Flagella_PoleBright']




y_param1='Nb_Pili_PoleDim'
y_param2='Nb_Pili_PoleBright'
tot_pili_limit=12
Ncells_per_pili_Dim=np.empty((tot_pili_limit), dtype='int16')
Ncells_per_pili_Bright=np.empty((tot_pili_limit), dtype='int16')
N=len(df)
print('Dim pole:')
print('NCells='+str(N), end=', ')
for i in range(tot_pili_limit):
    Ncells_per_pili_Dim[i]=len(df.loc[(df[y_param1]==i)])
    if (i<tot_pili_limit-1):
        print('N_'+str(i)+'pili='+str(Ncells_per_pili_Dim[i]), end=', ')
    else: print('N_'+str(i)+'pili='+str(Ncells_per_pili_Dim[i]))
print('Bright pole:')
print('Ncells='+str(N), end=', ')
for i in range(tot_pili_limit):
    Ncells_per_pili_Bright[i]=len(df.loc[(df[y_param2]==i)])
    if (i<tot_pili_limit-1):
        print('N_'+str(i)+'pili='+str(Ncells_per_pili_Bright[i]), end=', ')
    else: print('N_'+str(i)+'pili='+str(Ncells_per_pili_Bright[i]))
        
df.head()

0.19036081496072785
Dim pole:
NCells=122, N_0pili=85, N_1pili=28, N_2pili=6, N_3pili=2, N_4pili=0, N_5pili=1, N_6pili=0, N_7pili=0, N_8pili=0, N_9pili=0, N_10pili=0, N_11pili=0
Bright pole:
Ncells=122, N_0pili=68, N_1pili=38, N_2pili=8, N_3pili=4, N_4pili=0, N_5pili=2, N_6pili=1, N_7pili=0, N_8pili=1, N_9pili=0, N_10pili=0, N_11pili=0


Unnamed: 0,Unnamed: 1,Label,Area,Mean,Min,Max,X,Y,BiologicalReplicate,CellArea,...,PolarRatio,Strain,Growth,PoleRange,PercentTotalFluoDim,PercentTotalFluoBright,PercentTotalFluoCenter,polar_ratio,TotalPili,TotalFlagella
0,1,pilB_mNG_fliC-_liq_Gasket_0_event10_tirf_RAW_S...,623,255,255,255,29.6878,26.9366,1,708,...,0.88303,,,0.193503,0.174653,0.183289,0.642058,0.883034,2,0
1,2,pilB_mNG_fliC-_liq_Gasket_0_event10_tirf_RAW_S...,646,255,255,255,29.3483,29.6192,1,738,...,0.89502,,,0.185637,0.165632,0.180144,0.654224,0.895025,0,0
2,1,pilB_mNG_fliC-_liq_Gasket_0_event11_tirf_RAW_S...,519,255,255,255,18.57129,28.12813,1,594,...,1.02424,,,0.1633,0.135156,0.196733,0.668111,1.024241,1,0
3,2,pilB_mNG_fliC-_liq_Gasket_0_event11_tirf_RAW_S...,508,255,255,255,28.71654,11.49606,1,571,...,0.85662,,,0.273205,0.252007,0.255845,0.492148,0.856618,1,0
4,3,pilB_mNG_fliC-_liq_Gasket_0_event11_tirf_RAW_S...,795,255,255,255,33.67484,35.74906,1,898,...,0.78553,,,0.197105,0.164003,0.174259,0.661738,0.785531,0,0


In [5]:
sumPoles=(Ncells_per_pili_Dim+Ncells_per_pili_Bright)
Prob_Dim_if_nb_pili = [(Ncells_per_pili_Dim[i]/sumPoles[i]) for i in range(tot_pili_limit) if sumPoles[i]>0]
Prob_Bright_if_nb_pili=[(Ncells_per_pili_Bright[i]/sumPoles[i]) for i in range(tot_pili_limit) if sumPoles[i]>0]

Prob_per_pili_Dim= Ncells_per_pili_Dim/N
Prob_per_pili_Bright= Ncells_per_pili_Bright/N

In [6]:
p0 = bokeh.plotting.figure(
    width=600, 
    height=600, 
    x_axis_label='# pili', 
    y_axis_type='linear',
    y_axis_label ='P(# pili)',
    title="Probability of having # pili"
)

x_1=range(tot_pili_limit)


p0.line(
    x=x_1,
    y=Prob_per_pili_Dim, 
    line_color = 'blue',
    #fill_color = 'blue',
    alpha=0.7,
    #legend = labelsAll[i]
    legend = 'Dim Pole'
)


p0.line(
    x=x_1,
    y=Prob_per_pili_Bright, 
    line_color = 'red',
    #fill_color = 'red',
    alpha=0.7,
    #legend = labelsAll[i]
    legend = 'Bright pole'
)


bokeh.io.show(p0)

In [7]:
p1 = bokeh.plotting.figure(
    width=600, 
    height=600, 
    x_axis_label='# pili', 
    y_axis_type='linear',
    y_axis_label ='P(# pili|pole state)',
    title="Probability of having # pili if bright or dim pole"
)

x_1=range(len(Prob_Dim_if_nb_pili))


p1.line(
    x=x_1,
    y=Prob_Dim_if_nb_pili, 
    line_color = 'blue',
    #fill_color = 'blue',
    alpha=0.7,
    #legend = labelsAll[i]
    legend = 'Dim Pole'
)


p1.line(
    x=x_1,
    y=Prob_Bright_if_nb_pili, 
    line_color = 'red',
    #fill_color = 'red',
    alpha=0.7,
    #legend = labelsAll[i]
    legend = 'Bright pole'
)


bokeh.io.show(p1)

In [8]:
nb_pili=np.zeros((tot_pili_limit, 1))
boot_meanDim=np.zeros((tot_pili_limit, 1))
boot_medianDim=np.zeros((tot_pili_limit, 1))
boot_IC_meanDim=np.zeros((tot_pili_limit, 2))
boot_IC_medianDim=np.zeros((tot_pili_limit, 2))
boot_meanBright=np.zeros((tot_pili_limit, 1))
boot_medianBright=np.zeros((tot_pili_limit, 1))
boot_IC_meanBright=np.zeros((tot_pili_limit, 2))
boot_IC_medianBright=np.zeros((tot_pili_limit, 2))
for n_pili in range(tot_pili_limit):
    cell_fluorescence_arrayDim=df.TotalFluorescencePoleDim.loc[(df[y_param1]==n_pili)].values
    cell_fluorescence_arrayBright=df.TotalFluorescencePoleBright.loc[(df[y_param2]==n_pili)].values
    nb_pili[n_pili]=n_pili
    if (len(cell_fluorescence_arrayDim) > 0):
        bs_Dim_fluorescence_array=bootstrap_sampling(cell_fluorescence_arrayDim, len(cell_fluorescence_arrayDim), 1000)
        [bs_means, bs_medians, IC_means, IC_medians]=bootstrap_stats(bs_Dim_fluorescence_array)
        boot_meanDim[n_pili]=bs_means
        boot_IC_meanDim[n_pili,:]=IC_means
        boot_medianDim[n_pili]=bs_medians
        boot_IC_medianDim[n_pili,:]=IC_medians
    if (len(cell_fluorescence_arrayBright) > 0):
        bs_Bright_fluorescence_array=bootstrap_sampling(cell_fluorescence_arrayBright, len(cell_fluorescence_arrayBright), 1000)
        [bs_means, bs_medians, IC_means, IC_medians]=bootstrap_stats(bs_Bright_fluorescence_array)
        boot_meanBright[n_pili]=bs_means
        boot_IC_meanBright[n_pili,:]=IC_means
        boot_medianBright[n_pili]=bs_medians
        boot_IC_medianBright[n_pili,:]=IC_medians

names = ['TotalPili', 'bootMeanDim','bootMedianDim', 'bootMeanBright','bootMedianBright']
data = np.concatenate((nb_pili, boot_meanDim, boot_medianDim, boot_meanBright, boot_medianBright), axis=1)
df_boot_stats=pd.DataFrame(data=np.transpose(data), index=names).T
df_boot_stats['IC_meanDim']=list(boot_IC_meanDim)
df_boot_stats['IC_medianDim']=list(boot_IC_medianDim)
df_boot_stats['IC_meanBright']=list(boot_IC_meanBright)
df_boot_stats['IC_medianBright']=list(boot_IC_medianBright)
df_boot_stats

Unnamed: 0,TotalPili,bootMeanDim,bootMedianDim,bootMeanBright,bootMedianBright,IC_meanDim,IC_medianDim,IC_meanBright,IC_medianBright
0,0.0,14755.612,14598.0,17668.82025,16958.0,"[13677.658823529411, 15816.886764705881]","[13359.0, 15476.0]","[16296.442647058824, 19073.201102941177]","[15328.1, 19136.0]"
1,1.0,16553.8375,15466.0,18402.999816,18295.5,"[14935.963392857144, 18231.100892857143]","[14314.0, 18429.424999999996]","[16490.988815789475, 20605.43092105263]","[15504.0, 19781.0]"
2,2.0,22301.527833,23716.5,17326.97675,16509.5,"[15611.166666666666, 29520.462499999998]","[11765.5, 31806.0]","[13484.725, 21900.884375]","[12849.0, 19252.0]"
3,3.0,17828.908,17672.5,21976.807,20205.5,"[7897.0, 27448.0]","[7897.0, 27448.0]","[14851.5, 29830.25]","[12445.0, 34021.0]"
4,4.0,0.0,0.0,0.0,0.0,"[0.0, 0.0]","[0.0, 0.0]","[0.0, 0.0]","[0.0, 0.0]"
5,5.0,43952.0,43952.0,45327.68,44929.0,"[43952.0, 43952.0]","[43952.0, 43952.0]","[24995.0, 64863.0]","[24995.0, 64863.0]"
6,6.0,0.0,0.0,45942.0,45942.0,"[0.0, 0.0]","[0.0, 0.0]","[45942.0, 45942.0]","[45942.0, 45942.0]"
7,7.0,0.0,0.0,0.0,0.0,"[0.0, 0.0]","[0.0, 0.0]","[0.0, 0.0]","[0.0, 0.0]"
8,8.0,0.0,0.0,45073.0,45073.0,"[0.0, 0.0]","[0.0, 0.0]","[45073.0, 45073.0]","[45073.0, 45073.0]"
9,9.0,0.0,0.0,0.0,0.0,"[0.0, 0.0]","[0.0, 0.0]","[0.0, 0.0]","[0.0, 0.0]"


In [9]:
x_param1='TotalFluorescencePoleDim'#'PercentTotalFluoDim'
y_param1='Nb_Pili_PoleDim'
x_param2='TotalFluorescencePoleBright' #'PercentTotalFluoBright'
y_param2='Nb_Pili_PoleBright'
param2='TotalPili'
IC1='IC_medianDim'
IC2='IC_medianBright'
param1='bootMedianDim'
param3='bootMedianBright'
p2 = bokeh.plotting.figure(
    width=600, 
    height=600, 
    x_axis_label='# Pili', 
    y_axis_type='linear',
    y_axis_label ='Pole Total fluorescence',
    title="Pole fluorescence vs #Pili (bootstrap median, 95% CI)"
)

p2.circle(
    source=df,
    x=y_param1,
    y=x_param1, 
    line_color = 'blue',
    fill_color = 'blue',
    alpha=0.7,
    #legend = labelsAll[i]
    legend = 'Dim Pole'
)
p2.circle(
    source=df,
    x=y_param2,
    y=x_param2, 
    line_color = 'red',
    fill_color = 'red',
    alpha=0.7,
    #legend = labelsAll[i]
    legend = 'Bright Pole'
)

for n_pili in list(df_boot_stats[param2]):
    b = [n_pili,n_pili]
    ic=df_boot_stats.loc[(df_boot_stats[param2] == n_pili), [IC1]].values[0][0]
    p2.line(
        x = b,
        y = ic,
        color = 'black',
        alpha=0.7,
        line_width=3
    )
    
for n_pili in list(df_boot_stats['TotalPili']):
    b = [n_pili,n_pili]
    ic=df_boot_stats.loc[(df_boot_stats[param2] == n_pili), [IC2]].values[0][0]
    p2.line(
        x = b,
        y = ic,
        color = 'black',
        alpha=0.7,
        line_width=3
    )

p2.line(
    source = df_boot_stats.loc[:, [param2, param1]],
    x = param2,
    y = param1,
    line_color = 'blue',
    alpha=0.6
)


p2.circle(
    source = df_boot_stats.loc[:, [param2, param1]],
    x = param2,
    y = param1,
    line_color = 'black',
    fill_color = 'white',
    alpha=0.6,
    size=10
)

p2.line(
    source = df_boot_stats.loc[:, [param2, param3]],
    x = param2,
    y = param3,
    line_color = 'red',
    alpha=0.6
) 

p2.circle(
    source = df_boot_stats.loc[:, [param2, param3]],
    x = param2,
    y = param3,
    line_color = 'black',
    fill_color = 'white',
    alpha=0.6,
    size=10
)  

p2.output_backend = 'svg'

bokeh.io.show(p2)

In [10]:
x_param3='CellTotalFluorescence'
y_param3='polar_ratio'

print(x_param3+' vs '+y_param3+':')
[spearman_r, spearman_p]=stats.spearmanr(df[x_param3], df[y_param3])
print('Spearman correlation = '+str(spearman_r)+', p-value = '+ str(spearman_p))

[pearson_r, pearson_p]=stats.pearsonr(df[x_param3], df[y_param3])
print('Pearson correlation = '+str(pearson_r)+', p-value = '+str(pearson_p))

CellTotalFluorescence vs polar_ratio:
Spearman correlation = nan, p-value = nan
Pearson correlation = nan, p-value = 1.0


In [11]:
x_param3='CellTotalFluorescence'
y_param3='polar_ratio'

p1 = bokeh.plotting.figure(
    width=600, 
    height=600, 
    x_axis_label='Total fluorescence', 
    y_axis_type='linear',
    y_axis_label ='Polar Ratio',
    title="Polar Ratio vs Cell Total fluorescence"
)

p1.circle(
    source=df,
    x=x_param3,
    y=y_param3, 
    line_color = 'black',
    fill_color = 'gray',
    alpha=0.7,
    #legend = labelsAll[i]
)

bokeh.io.show(p1)

In [12]:
y_param1='Nb_Pili_PoleDim'
y_param2='Nb_Pili_PoleBright'
x_param1='TotalFluorescencePoleDim'
x_param2='TotalFluorescencePoleBright'
parameterX='TotalPili'
parameterY='PoleTotalFluorescence'
poles_pili_dim=list(df[y_param1])
poles_pili_bright=list(df[y_param2])
poles_pili=poles_pili_dim+poles_pili_bright
poles_pili=np.asarray(poles_pili).reshape(len(poles_pili),1)
poles_fluo_dim=list(df[x_param1])
poles_fluo_bright=list(df[x_param2])
poles_fluo=poles_fluo_dim+poles_fluo_bright
poles_fluo=np.asarray(poles_fluo).reshape(len(poles_fluo),1)

names = [parameterX, parameterY]
data = np.concatenate((poles_pili, poles_fluo), axis=1)
df_long=pd.DataFrame(data=np.transpose(data), index=names).T

In [13]:
x_param3=parameterX
y_param3=parameterY

print(x_param3+' vs '+y_param3+':')
[spearman_r, spearman_p]=stats.spearmanr(df_long[x_param3], df_long[y_param3])
print('Spearman correlation = '+str(spearman_r)+', p-value = '+ str(spearman_p))

[pearson_r, pearson_p]=stats.pearsonr(df_long[x_param3], df_long[y_param3])
print('Pearson correlation = '+str(pearson_r)+', p-value = '+str(pearson_p))

TotalPili vs PoleTotalFluorescence:
Spearman correlation = 0.19307373207167627, p-value = 0.0024538716063372394
Pearson correlation = 0.4913483804582763, p-value = 3.1096664762041554e-16


In [14]:
nb_pili=np.zeros((tot_pili_limit, 1))
boot_mean=np.zeros((tot_pili_limit, 1))
boot_median=np.zeros((tot_pili_limit, 1))
boot_IC_mean=np.zeros((tot_pili_limit, 2))
boot_IC_median=np.zeros((tot_pili_limit, 2))

for n_pili in range(tot_pili_limit):
    Pole_fluorescence_array=df_long.PoleTotalFluorescence.loc[(df_long[parameterX]==n_pili)].values
    nb_pili[n_pili]=n_pili
    if (len(Pole_fluorescence_array) > 0):
        bs_fluorescence_array=bootstrap_sampling(Pole_fluorescence_array, len(Pole_fluorescence_array), 1000)
        [bs_means, bs_medians, IC_means, IC_medians]=bootstrap_stats(bs_fluorescence_array)
        boot_mean[n_pili]=bs_means
        boot_IC_mean[n_pili,:]=IC_means
        boot_median[n_pili]=bs_medians
        boot_IC_median[n_pili,:]=IC_medians

names = ['TotalPili', 'bootMean','bootMedian']
data = np.concatenate((nb_pili, boot_mean, boot_median), axis=1)
df_boot_stats_tot=pd.DataFrame(data=np.transpose(data), index=names).T
df_boot_stats_tot['IC_mean']=list(boot_IC_mean)
df_boot_stats_tot['IC_median']=list(boot_IC_median)
df_boot_stats_tot

Unnamed: 0,TotalPili,bootMean,bootMedian,IC_mean,IC_median
0,0.0,16059.793536,15382.0,"[15167.997549019608, 16924.128104575164]","[14678.0, 16743.0]"
1,1.0,17571.032455,16978.5,"[16228.378030303029, 18835.024621212124]","[15383.0, 18843.5625]"
2,2.0,19406.273857,17665.0,"[15420.398214285715, 23988.535714285714]","[12849.0, 23732.0]"
3,3.0,20412.8685,20205.5,"[13291.333333333334, 27518.166666666668]","[10171.0, 30734.5]"
4,4.0,0.0,0.0,"[0.0, 0.0]","[0.0, 0.0]"
5,5.0,44937.520667,43952.0,"[24995.0, 64863.0]","[24995.0, 64863.0]"
6,6.0,45942.0,45942.0,"[45942.0, 45942.0]","[45942.0, 45942.0]"
7,7.0,0.0,0.0,"[0.0, 0.0]","[0.0, 0.0]"
8,8.0,45073.0,45073.0,"[45073.0, 45073.0]","[45073.0, 45073.0]"
9,9.0,0.0,0.0,"[0.0, 0.0]","[0.0, 0.0]"


In [15]:
param2='TotalPili'
IC1='IC_median'
param1='bootMedian'

p3 = bokeh.plotting.figure(
    width=600, 
    height=600, 
    x_axis_label='# pili', 
    y_axis_type='linear',
    y_axis_label ='pole fluorescence',
    title="Pole total fluorescence vs # pili (bootstrap median, 95% CI)"
)

p3.circle(
    source=df_long,
    x=parameterX,
    y=parameterY, 
    line_color = 'green',
    fill_color = 'white',
    alpha=0.4,
    #legend = labelsAll[i]
)

for n_pili in list(df_boot_stats_tot[param2]):
    b = [n_pili,n_pili]
    ic=df_boot_stats_tot.loc[(df_boot_stats_tot[param2] == n_pili), [IC1]].values[0][0]
    p3.line(
        x = b,
        y = ic,
        color = 'black',
        alpha=0.7,
        line_width=3
    )

p3.line(
    source = df_boot_stats_tot.loc[:, [param2, param1]],
    x = param2,
    y = param1,
    line_color = 'black',
    alpha=0.6
)


p3.circle(
    source = df_boot_stats_tot.loc[:, [param2, param1]],
    x = param2,
    y = param1,
    line_color = 'black',
    fill_color = 'white',
    alpha=0.6,
    size=10
)

p3.output_backend = 'webgl'

bokeh.io.show(p3)

In [17]:
a=np.array(df.Nb_Pili_PoleDim)
b=np.array(df.Nb_Pili_PoleBright)
pili_nb_diff=(b-a)
c=np.zeros(len(a))
for i in range(len(a)):
    if(b[i] > a[i]):
        c[i]=1
    else:
        c[i]=0        
d=sum(c)
persentage_piliBright=d/len(a)
print(persentage_piliBright)
print(pili_nb_diff)

0.4098360655737705
[-2  0  1 -1  0  0  1  2  0  0  0  1 -1 -1  1 -2  0  1  1  1  0  1 -1  0
 -1  0  0  0  1  1  0  0  1  0  1  1  0  1  0 -2  0 -1  1  1 -1  1  0  0
  0 -2  1 -1  1  1  0  1  1  0  1  1  1  1 -1  0  0  0  0  1  0 -1  1  0
  2  1  0  0 -1  0  2  2  2  0  1  1  0  2 -1  0  0  0  0 -1  0  0  0 -1
 -1  1  0  0  0  0 -1  1 -2  0  0  5  3  3  1  1  3  1 -1 -3 -1  1  0  2
  3  2]
