In [1]:
import os
import glob
import pandas as pd

import numpy as np
import bokeh.io
import bokeh.plotting
import bokeh.palettes
from bokeh.transform import jitter
import seaborn as sns
import matplotlib
from bokeh.models import HoverTool
from scipy import stats
import bokeh_catplot
import pandas as pd
import math

from bokeh.layouts import row

bokeh.io.output_notebook()

In [2]:
data_path = 'E:\Sauvegardes PhD\Covid-19\PaQa Levels\PaQaLevels'
working_path = "C:/Users/tala/Desktop/git/PhD_codes/Mechanosensation/Python_code/Motor_localization_analysis/"

os.chdir(working_path)
new_dir = "Data_PaQa_levels\\"
working_data_path = working_path + new_dir
if not os.path.exists(new_dir):
    os.mkdir(new_dir)
    
os.chdir(data_path)
extension = 'csv'
list_csv = []
for root, dirs, files in os.walk(data_path, topdown=False):
    for name in files:
        if extension in name:
            list_csv.append(os.path.join(root, name))

for file in list_csv:
    df_temp = pd.read_csv(file, sep=',', na_values='*')
    split_name=file.rsplit('Data_',2)[1].rsplit('_',100)
    #print(split_name)
    file_name = file.rsplit("\\",2)[2]
    df_temp['Strain'] = split_name[0]
    if 'liq' in split_name[2]:
        df_temp['Growth']='Liquid'
    elif 'sol' in split_name[2]:
        df_temp['Growth']='Solid'
    df_temp['Bio_Rep']=int(split_name[3].rsplit('BR=', 2)[1].rsplit('.',2)[0])
    df_temp.to_csv(working_data_path + 'enhanced_' + file_name, index = None, header=True)
    list_enhanced_csv = []

    
for root, dirs, files in os.walk(working_data_path, topdown=False):
    for name in files:
        if extension in name:
            list_enhanced_csv.append(os.path.join(root, name))

combined_csv = pd.concat([pd.read_csv(f) for f in list_enhanced_csv ], sort=False)
os.chdir(working_path)
combined_csv.to_csv( "SummaryPaQa.csv", index=False, encoding='utf-8-sig')

In [3]:
def plottingAllData(df, means, labels, indexes, param, colors):
    df.head()
    p = bokeh.plotting.figure(
        width=600, 
        height=600, 
        y_range=labels, 
        x_axis_type='linear',
        x_axis_label = 'Fluorescence intensity',
        title="Motor "+param+" on solid vs liquid environement"
    )
    for i, index in enumerate(indexes):
        p.circle(
            source=df.loc[index, :],
            x=param, 
            y=jitter('Labels', width=0.3, range=p.y_range),
            color = colors[i],
            alpha=0.3,
            #legend = labelsAll[i]
        )
    p.circle(
        source = means,
        x = param,
        y = 'Labels',
        size = 10,
        line_color = 'black',
        fill_color = 'white',
        legend = "medians"
    )
    p.add_tools(HoverTool(
            tooltips=[
                ('Strain', '@{Strain}'),
                ('Growth condition', '@{Growth}'),
                (param, '@{'+param+'}'),
                ('Biological replicate', '@{Bio_Rep}')
            ],
    ))
    return p

def plottingData(df, means, wt, order, strain_to_remove, growth, param, colors, scale, GraphTitle):
    df2 = df.copy()
    means2 = means.copy()
    
    if isinstance(growth, list):
        for strain in strain_to_remove:
            df2 = df2.drop(df2[(df2['Strain'] == strain)].index)
            means2 = means2.drop(means2[(means2['Strain'] == strain)].index)
        for strain in order:
            df3 = df2.copy()
            means3 = means2.copy()
        val = 0.5
        labels = [None]*(2+len(order)*2)
        indexes = [None]*(2+len(order)*2)
        df3.loc[((df3['Strain'] == (wt)) & df3['Growth'].str.match('Liquid')), 'Labels'] = val
        means3.loc[((means3['Strain'] == (wt)) & means3['Growth'].str.match('Liquid')), 'Labels'] = val
        labels[0] = wt+' '+'Liquid'
        indexes[0] = ((df3['Strain'] == (wt)) & df3['Growth'].str.match('Liquid'))
        df3.loc[((df2['Strain'] == (wt)) & df3['Growth'].str.match('Solid')), 'Labels'] = (val+1)
        means3.loc[((means3['Strain'] == (wt)) & means3['Growth'].str.match('Solid')), 'Labels'] = (val+1)
        labels[1] = wt+' '+'Solid'
        indexes[1] = ((df3['Strain'] == (wt)) & df3['Growth'].str.match('Solid'))
        val = val + 2
        n=2
        for o, ordering in enumerate(order):
            for a, g in enumerate(growth):
                df3.loc[((df2['Strain'] == (ordering)) & df3['Growth'].str.match(g)), 'Labels'] = val
                means3.loc[((means3['Strain'] == (ordering)) & means3['Growth'].str.match(g)), 'Labels'] = val
                labels[n] = ordering + ' ' +g
                indexes[n] = ((df3['Strain'] == (ordering)) & df3['Growth'].str.match(g))
                val = val + 1
                n=n+1
        lookupdf2=df3.groupby(['Strain', 'Growth', 'Labels', 'Bio_Rep'])[param].median().to_frame().reset_index()
        p = bokeh.plotting.figure(
            width=600, 
            height=600, 
            x_range=labels, 
            y_axis_type=scale,
            y_axis_label = 'Fluorescence intensity',
            title=GraphTitle+" "+param+" on "+growth[0]+" vs. "+growth[1]+" in "+scale+" scale"
        )
        p.xaxis.major_label_orientation = math.pi/4
        p.xaxis.major_label_text_font_size = "9pt"
        for i, index in enumerate(indexes):
            p.circle(
                source=df3.loc[index, :],
                x=jitter('Labels', width=0.3, range=p.y_range),
                y=param, 
                color = colors[i],
                alpha=0.3,
                #legend = labelsAll[i]
            )
        p.circle(
            source = means3,
            x = 'Labels',
            y = param,
            size = 10,
            line_color = 'black',
            fill_color = 'white',
            legend = "medians"
        )
        p.add_tools(HoverTool(
                tooltips=[
                    ('Strain', '@{Strain}'),
                    ('Growth condition', '@{Growth}'),
                    (param, '@{'+param+'}'),
                    ('Biological replicate', '@{Bio_Rep}')
                ],
        ))
    else:
        if growth == 'Liquid':
            ungrowth = 'Solid'
        else:
            ungrowth = 'Liquid'
        for strain in strain_to_remove:
            df2 = df2.drop(df2[(df2['Strain'] == strain)].index)
            means2 = means2.drop(means2[(means2['Strain'] == strain)].index)
        for strain in order:
            df3 = df2.drop(df2[((df2['Strain'] == strain) & (df2['Growth'] == ungrowth))].index)
            means3 = means2.drop(means2[((means2['Strain'] == strain) & (means2['Growth'] == ungrowth))].index)
        val = 0.5
        labels = [None]*(2+len(order))
        indexes = [None]*(2+len(order))
        df3.loc[((df3['Strain'] == (wt)) & df3['Growth'].str.match('Liquid')), 'Labels'] = val
        means3.loc[((means3['Strain'] == (wt)) & means3['Growth'].str.match('Liquid')), 'Labels'] = val
        labels[0] = wt+' '+'Liquid'
        indexes[0] = ((df3['Strain'] == (wt)) & df3['Growth'].str.match('Liquid'))
        df3.loc[((df2['Strain'] == (wt+'_3h')) & df3['Growth'].str.match('Solid')), 'Labels'] = (val+1)
        means3.loc[((means3['Strain'] == (wt+'_3h')) & means3['Growth'].str.match('Solid')), 'Labels'] = (val+1)
        labels[1] = wt+' '+'Solid'
        indexes[1] = ((df3['Strain'] == (wt+'_3h')) & df3['Growth'].str.match('Solid'))
        val = val + 2
        for o, ordering in enumerate(order):
            df3.loc[((df2['Strain'] == (wt+'_'+ordering)) & df3['Growth'].str.match(growth)), 'Labels'] = val
            means3.loc[((means3['Strain'] == (wt+'_'+ordering)) & means3['Growth'].str.match(growth)), 'Labels'] = val
            labels[o+2] = wt+'_'+ordering+' '+growth
            indexes[o+2] = ((df3['Strain'] == (wt+'_'+ordering)) & df3['Growth'].str.match(growth))
            val = val + 1
        lookupdf2=df3.groupby(['Strain', 'Growth', 'Labels', 'Bio_Rep'])[param].median().to_frame().reset_index()
        p = bokeh.plotting.figure(
            width=600, 
            height=600, 
            x_range=labels, 
            y_axis_type=scale,
            y_axis_label = 'Fluorescence intensity',
            title=GraphTitle+" "+param+" on "+growth
        )
        p.xaxis.major_label_orientation = math.pi/4
        p.xaxis.major_label_text_font_size = "9pt"
        for i, index in enumerate(indexes):
            p.circle(
                source=df3.loc[index, :],
                x=jitter('Labels', width=0.3, range=p.y_range),
                y=param, 
                color = colors[i],
                alpha=0.3,
                #legend = labelsAll[i]
            )
        p.circle(
            source = means3,
            x = 'Labels',
            y = param,
            size = 10,
            line_color = 'black',
            fill_color = 'white',
            legend = "medians"
        )
        p.add_tools(HoverTool(
                tooltips=[
                    ('Strain', '@{Strain}'),
                    ('Growth condition', '@{Growth}'),
                    (param, '@{'+param+'}'),
                    ('Biological replicate', '@{Bio_Rep}')
                ],
        ))
    return p

def getDFStats(df, param):
    Strains=list(df.Strain.unique())
    Growth=list(df.Growth.unique())
    Reps=list(df.Bio_Rep.unique())
    inds = [None]*(len(Strains)*len(Growth))
    labels = [None]*(len(Strains)*len(Growth))
    labelsAll = [None]*(len(Strains)*len(Growth)*len(Reps))
    indexes = [None]*(len(Strains)*len(Growth)*len(Reps))
    n = 0
    m = 0
    for strain in Strains:
        for growth in Growth:
            inds[n] = (df['Strain'] == strain) & (df['Growth'] == growth)
            labels[n] = strain+' '+growth
            n= n + 1
            for rep in Reps:
                indexes[m] = (df['Strain'] == strain) & (df['Growth'] == growth) & (df['Bio_Rep'] == rep)
                labelsAll[m] = strain+' '+growth
                m = m + 1
    df['Labels'] = 0
    #df.insert(2, "Labels", 0, True)
    vertical_position = [None]*len(labels)
    val=0.5        
    for s, strain in enumerate(labels):
        vertical_position[s] = val
        df.loc[inds[s], 'Labels'] = val
        val = val+1        
    means = df.groupby(['Strain', 'Growth', 'Labels', 'Bio_Rep'])[param].median().to_frame().reset_index()
    stds = df.groupby(['Strain', 'Growth', 'Labels', 'Bio_Rep'])[param].std().to_frame().reset_index()
    return means, stds, indexes, labels

In [4]:
df_long = pd.read_csv('SummaryPaQa.csv', sep=',', na_values='*')
df_long['PaQa_ratio']=df_long['MeanCellIntensity_PaQa']/df_long['MeanCellIntensity_RFP']
lengths = df_long.groupby(['Strain', 'Growth', 'Bio_Rep'])['CellLength'].median().to_frame().reset_index()
l_max=lengths.CellLength.max()
df=df_long.loc[(df_long['CellLength'] <= l_max)].copy()

df_pilT=df.loc[df['Strain'].str.contains('pilT', regex = False)].copy()
df_pilU=df.loc[df['Strain'].str.contains('pilU', regex = False)].copy()
df_pilB=df.loc[df['Strain'].str.contains('pilB', regex = False)].copy()
df_long.head()

Unnamed: 0,Frame,CellID,Area,CellLength,CellSinuosity,CellWidth,MaxCellIntensity_RFP,MeanCellIntensity_RFP,MedianCellIntensity_RFP,MinCellIntensity_RFP,MaxCellIntensity_PaQa,MeanCellIntensity_PaQa,MedianCellIntensity_PaQa,MinCellIntensity_PaQa,Strain,Growth,Bio_Rep,PaQa_ratio
0,1,1,0.6422,2.171924,1.012087,0.411096,1276,937.0,979.5,287,12875,10150.585526,10896.0,2218,cpdA-fliC-,Liquid,1,10.833069
1,1,2,0.714025,2.452315,1.072699,0.473207,1753,1296.47929,1363.0,387,18139,13682.911243,14488.0,3412,cpdA-fliC-,Liquid,1,10.553899
2,1,3,0.7943,2.826543,1.049439,0.495025,1345,991.287234,1028.5,354,15702,11890.271277,12240.0,2825,cpdA-fliC-,Liquid,1,11.994779
3,1,4,0.747825,2.490391,1.067156,0.495025,1598,1122.960452,1181.0,412,12326,9029.966102,9502.0,2181,cpdA-fliC-,Liquid,1,8.041215
4,1,5,0.66755,2.450402,1.052882,0.495025,1699,1338.987342,1360.0,711,18226,13319.455696,14275.5,5389,cpdA-fliC-,Liquid,1,9.94741


In [5]:
#param='MeanCellIntensity_mNeonGreen'
param='PaQa_ratio'
    
[means, stds, indexes, labels] = getDFStats(df_long, param)

#stds = df.groupby(['Strain', 'Growth', 'Labels', 'Bio_Rep'])[param].std().to_frame().reset_index()
#df.to_csv( "DataSet.csv", index=False, encoding='utf-8-sig')
#means_pilT

In [6]:
colors = bokeh.palettes.d3['Category20'][20]*3
growth=['Liquid', 'Solid']

p1 = plottingData(df_long, means, 'WT-alex', ['WT-Px2','WT-lolo','fliC-lolo','fliC-','pilH-fliC-','cpdA-fliC-', 'pilG-fliC-', 'cyaB-fliC-', 'pilT-pilA-fliC-'], [], growth, param, colors, 'linear', '')
p2 = plottingData(df_long, means, 'WT-alex', ['WT-Px2','WT-lolo','fliC-lolo','fliC-','pilH-fliC-','cpdA-fliC-', 'pilG-fliC-', 'cyaB-fliC-', 'pilT-pilA-fliC-'], [], growth, param, colors, 'log', '')
p3 = plottingData(df_long, means, 'WT-alex', ['WT-Px2','WT-lolo','fliC-lolo','fliC-'], ['pilH-fliC-','cpdA-fliC-', 'pilG-fliC-', 'cyaB-fliC-', 'pilT-pilA-fliC-'], growth, param, colors, 'linear', '')

bokeh.io.show(row(p1,p2,p3))