## Imports and constants

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats as st
import statsmodels.api as sm

import os
import time
import random
import itertools

from copy import copy
from cycler import cycler
from datetime import date
from scipy.optimize import curve_fit
from statannot import add_stat_annotation

pd.set_option('mode.chained_assignment',None)

In [2]:
import re
dateFormatExcel = re.compile('\d{2}/\d{2}/\d{4}')
dateFormatOk = re.compile('\d{2}-\d{2}-\d{2}')

In [3]:
import matplotlib
import matplotlib.pyplot as plt

%matplotlib widget 
# %matplotlib inline
matplotlib.rcParams.update({'figure.autolayout': True})


SMALLER_SIZE = 10
SMALL_SIZE = 14
MEDIUM_SIZE = 16
BIGGER_SIZE = 20

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=MEDIUM_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALLER_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

# prop_cycle = plt.rcParams['axes.prop_cycle']
# colors = prop_cycle.by_key()['color']
new_color_list = color=['#ff7f0e', '#1f77b4', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
new_color_cycle = cycler(color=new_color_list)
plt.rcParams['axes.prop_cycle'] = new_color_cycle

In [4]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool, Range1d
from bokeh.transform import factor_cmap
from bokeh.palettes import Category10
from bokeh.layouts import gridplot
output_notebook()

In [5]:
# mainDir = "C://Users//JosephVermeil//Desktop//ActinCortexAnalysis"
mainDir = "C://Users//josep//Desktop//ActinCortexAnalysis"
experimentalDataDir = os.path.join(mainDir, "ExperimentalData")
dataDir = os.path.join(mainDir, "DataAnalysis")
figDir = os.path.join(dataDir, "Figures")
todayFigDir = os.path.join(figDir, "Historique//" + str(date.today()))
timeSeriesDataDir = os.path.join(dataDir, "TimeSeriesData")

In [6]:
# Display all detected time Series Data Files.

# allTimeSeriesDataFiles = [f for f in os.listdir(timeSeriesDataDir) if (os.path.isfile(os.path.join(timeSeriesDataDir, f)) and f.endswith(".csv"))]
# allTimeSeriesDataFiles

Some generic useful subfunctions:

In [7]:
def get_R2(Y1, Y2):
    meanY = np.mean(Y1)
    meanYarray = meanY*np.ones(len(Y1))
    SST = np.sum((Y1-meanYarray)**2)
    SSE = np.sum((Y2-meanYarray)**2)
    R2 = SSE/SST
    return(R2)

def getDictAggMean(df):
    dictAggMean = {}
    for c in df.columns:
#         t = df[c].dtype
#         print(c, t)
        try :
            if np.array_equal(df[c], df[c].astype(bool)):
                dictAggMean[c] = 'min'
            else:
                try:
                    np.mean(df[c])
                    dictAggMean[c] = 'mean'
                except:
                    dictAggMean[c] = 'first'
        except:
                dictAggMean[c] = 'first'
    return(dictAggMean)

def findFirst(x, A):
    idx = (A==x).view(bool).argmax()
    return(idx)

def fitLine(X, Y):
    X = sm.add_constant(X)
    model = sm.OLS(Y, X)
    results = model.fit()
    params = results.params # Y=a*X+b ; params[0] = b,  params[1] = a
#     print(dir(results))
#     R2 = results.rsquared
#     ci = results.conf_int(alpha=0.05)
#     CovM = results.cov_params()
#     p = results.pvalues

# This is how are computed conf_int:
#
#     bse = results.bse
#     dist = stats.t
#     alpha = 0.05
#     q = dist.ppf(1 - alpha / 2, results.df_resid)
#     params = results.params
#     lower = params - q * bse
#     upper = params + q * bse
#     print(lower, upper)
    
    return(results.params, results)

def archiveFig(fig, ax, name='auto', figDir = todayFigDir, figSubDir=''):
    if not os.path.exists(figDir):
        os.makedirs(figDir)
    
    saveDir = os.path.join(todayFigDir, figSubDir)
    if not os.path.exists(saveDir):
        os.makedirs(saveDir)
    
    if name != 'auto':
        fig.savefig(os.path.join(saveDir, name + '.png'))
    
    else:
        suptitle = fig._suptitle.get_text()
        if len(suptitle) > 0:
            name = suptitle
            fig.savefig(os.path.join(saveDir, name + '.png'))
        
        else:
            try:
                N = len(ax)
                ax = ax[0]
            except:
                N = 1
                ax = ax
                
            xlabel = ax.get_xlabel()
            ylabel = ax.get_ylabel()
            if len(xlabel) > 0 and len(ylabel) > 0:
                name = ylabel + ' Vs ' + xlabel
                if N > 1:
                    name = name + '___etc'
                fig.savefig(os.path.join(saveDir, name + '.png'))
            
            else:
                title = ax.get_title()
                if len(title) > 0:
                    if N > 1:
                        name = name + '___etc'
                    fig.savefig(os.path.join(saveDir, name + '.png'))
                
                else:
                    figNum = gcf().number
                    name = 'figure ' + str(figNum) 
                    fig.savefig(os.path.join(saveDir, name + '.png'))

Note: How to compute confidence invetervals of fitted parameters with (1-alpha) confidence:

    0) from scipy import stats
    1) df = nb_pts - nb_parms ; se = diag(cov)**0.5
    2) Student t coefficient : q = stat.t.ppf(1 - alpha / 2, df)
    3) ConfInt = [params - q*se, params + q*se]

In [8]:
# Test of the fitLine function

# Npts = 10
# seed = 10
# std = 2

# np.random.seed(seed)
# X = np.array([i for i in range(Npts)])
# Y = np.array([i for i in range(Npts)])
# Y = Y + np.random.normal(0, std, Npts)
# p, R = fitLine(X, Y)

# Ypred = p[1]*X+p[0]
# fig, ax = plt.subplots(1,1)
# ax.plot(X, Y, 'bo')
# ax.plot(X, Ypred, 'r-', label='best fit')
# ax.legend(loc='best')
# fig.show()

In [9]:
# Test of the lmfit library // <examples/doc_model_gaussian.py>

# import matplotlib.pyplot as plt
# from lmfit import Model

# np.random.seed(seed)

# x = np.array([i for i in range(Npts)])
# y = np.array([i for i in range(Npts)])
# y = y + np.random.normal(0, std, Npts)


# # def gaussian(x, amp, cen, wid):
# #     """1-d gaussian: gaussian(x, amp, cen, wid)"""
# #     return (amp / (sqrt(2*pi) * wid)) * exp(-(x-cen)**2 / (2*wid**2))

# def linear(x, a, b):
#     """linear function"""
#     return (a*x + b)

# # gmodel = Model(gaussian)
# # result = gmodel.fit(y, x=x, amp=5, cen=5, wid=1)

# # print(result.fit_report())

# # plt.plot(x, y, 'bo')
# # plt.plot(x, result.init_fit, 'k--', label='initial fit')
# # plt.plot(x, result.best_fit, 'r-', label='best fit')
# # plt.legend(loc='best')
# # plt.show()

# lmodel = Model(linear)
# result = lmodel.fit(y, x=x, a=1.5, b=-1)

# print(result.fit_report())

# fig, ax = plt.subplots(1,1)
# ax.plot(x, y, 'bo')
# # ax.plot(x, result.init_fit, 'k--', label='initial fit')
# ax.plot(x, result.best_fit, 'r-', label='best fit')
# ax.legend(loc='best')
# fig.show()

# # print(dir(result))

In [10]:
# Test of the curve_fit function and subsequent computation of confidence intervals

# np.random.seed(seed)

# x = np.array([i for i in range(Npts)])
# y = np.array([i for i in range(Npts)])
# y = y + np.random.normal(0, std, Npts)

# def linear(x, a, b):
#     """linear function"""
#     return (a*x + b)


# initialParameters = [1.5, -1]
# #     print(initialParameters)

# # bounds on parameters - initial parameters must be within these
# lowerBounds = (-np.Inf, -np.Inf)
# upperBounds = (np.Inf, np.Inf)
# parameterBounds = [lowerBounds, upperBounds]

# # fittedParameters, pcov = curve_fit(chadwickModel, hCompr, fCompr, initialParameters, bounds = parameterBounds)
# fittedParameters, pcov = curve_fit(linear, x, y, initialParameters, bounds = parameterBounds)

# A, B = fittedParameters
# print(A, B)
# ypredict = linear(x, A, B)

# varA = pcov[0,0]
# print(pcov)
# print(pcov[0,0]**0.5)
# SSR = np.sum((np.array(y)-np.array(ypredict))**2)
# seA = ((SSR/(len(x)-2))*varA)**0.5
# seA = (varA)**0.5
# confIntA = st.t.interval(alpha=0.95, df=len(x)-2, loc=A, scale=seA)
# confIntAHalfWidthBis = st.t.ppf(0.975, len(x)-2) * seA
# print(confIntAHalfWidthBis)
# # confIntA = (A + seA*st.t.interval(alpha=0.95, df=len(x), loc=0, scale=1)[0], A + seA*st.t.interval(alpha=0.95, df=len(x), loc=0, scale=1)[1])
# confIntAWidth = confIntA[1] - confIntA[0]
# confIntAHalfWidth = confIntAWidth/2
# R2 = get_R2(y,ypredict)

# print(str(A) + ' +/- ' + str(confIntAHalfWidth))

# fig, ax = plt.subplots(1,1)
# ax.plot(x, y, 'bo')
# ax.plot(x, ypredict, 'r-', label='best fit')
# ax.legend(loc='best')
# fig.show()

In [11]:
# Test of the get_R2 function.
# T = df['T']
# Y = df['D3']
# plt.plot(T,Y)
# p, residuals, rank, singular_values, rcond = np.polyfit(T, Y, deg=5, full=True)
# plt.plot(T, Y)
# Y2 = np.zeros(len(T))
# for i in range(len(T)):
#     deg = len(p)-1
#     for k in range(deg+1):
#         Y2[i] += p[k]*(T[i]**(deg-k))
# plt.plot(T,Y2)
# get_R2(Y, Y2)

## TimeSeries functions

In [12]:
def getCellTimeSeriesData(cellID):
    allTimeSeriesDataFiles = [f for f in os.listdir(timeSeriesDataDir) if (os.path.isfile(os.path.join(timeSeriesDataDir, f)) and f.endswith(".csv"))]
    fileFound = False
    nFile = len(allTimeSeriesDataFiles)
    iFile = 0
    while (not fileFound) and (iFile < nFile):
        f = allTimeSeriesDataFiles[iFile]
        if f.startswith(cellID):
            timeSeriesDataFilePath = os.path.join(timeSeriesDataDir, f)
            timeSeriesDataFrame = pd.read_csv(timeSeriesDataFilePath, sep=';')
            fileFound = True
        iFile += 1
    if not fileFound:
        timeSeriesDataFrame = pd.DataFrame([])
    return(timeSeriesDataFrame)

def plotCellTimeSeriesData(cellID):
    X = 'T'
    Y = np.array(['B', 'F', 'dx', 'dy', 'dz', 'D2', 'D3'])
    units = np.array([' (mT)', ' (pN)', ' (µm)', ' (µm)', ' (µm)', ' (µm)', ' (µm)'])
    timeSeriesDataFrame = getCellTimeSeriesData(cellID)
    if not timeSeriesDataFrame.size == 0:
#         plt.tight_layout()
#         fig.show() # figsize=(20,20)
        axes = timeSeriesDataFrame.plot(x=X, y=Y, kind='line', ax=None, subplots=True, sharex=True, sharey=False, layout=None, \
                       figsize=(8,10), use_index=True, title = cellID + '- Time dependant data', grid=None, legend=False, style=None, logx=False, logy=False, \
                       loglog=False, xticks=None, yticks=None, xlim=None, ylim=None, rot=None, fontsize=None, colormap=None, \
                       table=False, yerr=None, xerr=None, secondary_y=False, sort_columns=False)
        plt.gcf().tight_layout()
        for i in range(len(Y)):
            axes[i].set_ylabel(Y[i] + units[i])
        
    else:
        print('cell not found')
        
def addExcludedCell(cellID, motive):
    f = open(os.path.join(experimentalDataDir, 'ExcludedCells.txt'), 'r')
    lines = f.readlines()
    nLines = len(lines)
    excludedCellsList = []
    for iLine in range(nLines):
        line = lines[iLine]
        splitLine = line[:-1].split(',')
        excludedCellsList.append(splitLine[0])
    if cellID in excludedCellsList:
        newlines = copy(lines)
        iLineOfInterest = excludedCellsList.index(cellID)
        if motive not in newlines[iLineOfInterest][:-1].split(','):
            newlines[iLineOfInterest] = newlines[iLineOfInterest][:-1] + ',' + motive + '\n'            
    else:
        newlines = copy(lines)
        newlines.append('' + cellID + ',' + motive + '\n')
    f.close()
    f = open(os.path.join(experimentalDataDir, 'ExcludedCells.txt'), 'w')
    f.writelines(newlines)
    
def getExcludedCells():
    f = open(os.path.join(experimentalDataDir, 'ExcludedCells.txt'), 'r')
    lines = f.readlines()
    nLines = len(lines)
    excludedCellsDict = {}
    for iLine in range(nLines):
        line = lines[iLine]
        splitLine = line[:-1].split(',')
        excludedCellsDict[splitLine[0]] = splitLine[1:]
    return(excludedCellsDict)

In [13]:
# df = getCellTimeSeriesData('21-02-10_M1_P1_C1')
# plotCellTimeSeriesData('21-01-21_M1_P1_C11')
# plotCellTimeSeriesData('21-04-21_M1_P1_C5')

## GlobalTables functions

### Experimental conditions

In [14]:
def getExperimentalConditions(save = False):
    # Getting the table
    experimentalDataFile = 'ExperimentalConditions.csv'
    experimentalDataFilePath = os.path.join(experimentalDataDir, experimentalDataFile)
    expConditionsDF = pd.read_csv(experimentalDataFilePath, sep=';',header=0)
    print('Extracted a table with ' + str(expConditionsDF.shape[0]) + ' lines and ' + str(expConditionsDF.shape[1]) + ' columns.')
    
    # Cleaning the table
    try:
        for c in expConditionsDF.columns:
            if 'Unnamed' in c:
                expConditionsDF = expConditionsDF.drop([c], axis=1)
        expConditionsDF = expConditionsDF.convert_dtypes()

        listTextColumns = []
        for col in expConditionsDF.columns:
            try:
                if expConditionsDF[col].dtype == 'string':
                    listTextColumns.append(col)
            except:
                aaaa=0
                #Ok

        expConditionsDF[listTextColumns] = expConditionsDF[listTextColumns].apply(lambda x: x.str.replace(',','.'))

        expConditionsDF['scale pixel per um'] = expConditionsDF['scale pixel per um'].astype(float)
        try:
            expConditionsDF['optical index correction'] = \
                      expConditionsDF['optical index correction'].apply(lambda x: x.split('/')[0]).astype(float) \
                    / expConditionsDF['optical index correction'].apply(lambda x: x.split('/')[1]).astype(float)
        except:
            print('optical index correction already in ' + str(expConditionsDF['optical index correction'].dtype) + ' type.')

        expConditionsDF['magnetic field correction'] = expConditionsDF['magnetic field correction'].astype(float)
        expConditionsDF['with fluo images'] = expConditionsDF['with fluo images'].astype(bool)

        try:
            expConditionsDF['ramp field'] = \
            expConditionsDF['ramp field'].apply(lambda x: [x.split(';')[0], x.split(';')[1]] if not pd.isnull(x) else [])
        except:
            aaaa=0
            #Ok

        dateExemple = expConditionsDF.loc[expConditionsDF.index[1],'date']

        if re.match(dateFormatExcel, dateExemple):
            print('dates corrected')
            expConditionsDF.loc[1:,'date'] = expConditionsDF.loc[1:,'date'].apply(lambda x: x.split('/')[0] + '-' + x.split('/')[1] + '-' + x.split('/')[2][2:])        
        
    except:
        print('Unexpected bug with the cleaning step')

    if save:
        saveName = 'ExperimentalConditions.csv'
        savePath = os.path.join(experimentalDataDir, saveName)
        expConditionsDF.to_csv(savePath, sep=';')

    expConditionsDF['manipID'] = expConditionsDF['date'] + '_' + expConditionsDF['manip']
    
    return(expConditionsDF)

In [15]:
getExperimentalConditions(save=False).head()

Extracted a table with 34 lines and 21 columns.
optical index correction already in float64 type.


Unnamed: 0,date,manip,experimentType,drug,substrate,objective magnification,scale pixel per um,objective immersion,optical index correction,magnetic field correction,...,cell subtype,bead type,bead diameter,normal field,ramp field,compression duration,with fluo images,bacteria,comments,manipID
0,DEFAULT,DEFAULT,DEFAULT,DEFAULT,DEFAULT,100X,15.8,oil,0.875,1.15,...,DEFAULT,M450,4503,5,['3'. '40'],1s,False,,,DEFAULT_DEFAULT
1,20-08-04,M1,compressions and constant field,none,BSA coated glass,100X,15.8,oil,0.875,1.15,...,aSFL,M450,4503,10,['3'. '40'],1s,False,,,20-08-04_M1
2,20-08-04,M2,compressions and constant field,doxycyclin,BSA coated glass,100X,15.8,oil,0.875,1.15,...,aSFL,M450,4503,10,['3'. '40'],1s,False,,,20-08-04_M2
3,20-08-05,M1,compressions and constant field,doxycyclin,BSA coated glass,100X,15.8,oil,0.875,1.15,...,aSFL,M450,4503,10,['3'. '40'],1s,False,,,20-08-05_M1
4,20-08-05,M2,compressions and constant field,none,BSA coated glass,100X,15.8,oil,0.875,1.15,...,aSFL,M450,4503,10,['3'. '40'],1s,False,,,20-08-05_M2


### Constant Field

In [16]:
listColumnsCtField = ['date','cellName','cellID','manipID',\
                      'duration','medianRawB','medianThickness',\
                      '1stDThickness','9thDThickness','fluctuAmpli',\
                      'R2_polyFit','validated']

def analyseTimeSeries_ctField(tsDf):
    results = {}
    results['duration'] = np.max(tsDf['T'])
    results['medianRawB'] = np.median(tsDf.B)
    results['medianThickness'] = np.median(tsDf.D3)
    results['1stDThickness'] = np.percentile(tsDf.D3, 10)
    results['9thDThickness'] = np.percentile(tsDf.D3, 90)
    results['fluctuAmpli'] = results['9thDThickness'] - results['1stDThickness']
    results['validated'] = (results['1stDThickness'] > 0)
    X, Y = tsDf['T'], tsDf['D3']
    p, residuals, rank, singular_values, rcond = np.polyfit(X, Y, deg=5, full=True)
    Y2 = np.zeros(len(X))
    for i in range(len(X)):
        deg = len(p)-1
        for k in range(deg+1):
            Y2[i] += p[k]*(X[i]**(deg-k))
    results['R2_polyFit'] = get_R2(Y, Y2)
    return(results)



def createDataDict_ctField(list_ctFieldFiles):
    tableDict = {}
    tableDict['date'], tableDict['cellName'], tableDict['cellID'], tableDict['manipID'] = [], [], [], []
    tableDict['duration'], tableDict['medianRawB'], tableDict['medianThickness'] = [], [], []
    tableDict['1stDThickness'], tableDict['9thDThickness'], tableDict['fluctuAmpli'] = [], [], []
    tableDict['R2_polyFit'], tableDict['validated'] = [], []
    for f in list_ctFieldFiles:
        split_f = f.split('_')
        tableDict['date'].append(split_f[0])
        tableDict['cellName'].append(split_f[1] + '_' + split_f[2] + '_' + split_f[3])
        tableDict['cellID'].append(split_f[0] + '_' + split_f[1] + '_' + split_f[2] + '_' + split_f[3])
        tableDict['manipID'].append(split_f[0] + '_' + split_f[1])
        tS_DataFilePath = os.path.join(timeSeriesDataDir, f)
        current_tsDf = pd.read_csv(tS_DataFilePath, ';')
        current_resultDict = analyseTimeSeries_ctField(current_tsDf)
        for k in current_resultDict.keys():
            tableDict[k].append(current_resultDict[k])
    return(tableDict)



def computeGlobalTable_ctField(task = 'fromScratch', fileName = 'Global_CtFieldData', save = False):
    ctFieldTimeSeriesDataFiles = [f for f in os.listdir(timeSeriesDataDir) \
                                  if (os.path.isfile(os.path.join(timeSeriesDataDir, f)) and f.endswith(".csv") \
                                      and ('thickness' in f))]
#     print(ctFieldTimeSeriesDataFiles)
    if task == 'fromScratch':
        # create a dict containing the data
        tableDict = createDataDict_ctField(ctFieldTimeSeriesDataFiles) # MAIN SUBFUNCTION
        # create the table
        CtField_DF = pd.DataFrame(tableDict)
        
    elif task == 'updateExisting':
        # get existing table
        try:
            savePath = os.path.join(dataDir, (fileName + '.csv'))
            existing_CtField_DF = pd.read_csv(savePath, sep=';')
            for c in existing_CtField_DF.columns:
                if 'Unnamed' in c:
                    existing_CtField_DF = existing_CtField_DF.drop([c], axis=1)
        except:
            print('No existing table found')
        # find which of the time series files are new
        new_ctFieldTimeSeriesDataFiles = []
        for f in ctFieldTimeSeriesDataFiles:
            split_f = f.split('_')
            currentCellID = split_f[0] + '_' + split_f[1] + '_' + split_f[2] + '_' + split_f[3]
            if currentCellID not in existing_CtField_DF.cellID.values:
                new_ctFieldTimeSeriesDataFiles.append(f)
        new_tableDict = createDataDict_ctField(new_ctFieldTimeSeriesDataFiles) # MAIN SUBFUNCTION
        # create the table with new data
        new_CtField_DF = pd.DataFrame(new_tableDict)
        # fuse the two
        new_CtField_DF.index += existing_CtField_DF.shape[0]
        CtField_DF = pd.concat([existing_CtField_DF, new_CtField_DF])
    
    dateExemple = CtField_DF.loc[CtField_DF.index[0],'date']
    if re.match(dateFormatExcel, dateExemple):
        CtField_DF.loc[:,'date'] = CtField_DF.loc[:,'date'].apply(lambda x: x.split('/')[0] + '-' + x.split('/')[1] + '-' + x.split('/')[2][2:])
    
    if save:
        saveName = fileName + '.csv'
        savePath = os.path.join(dataDir, saveName)
        CtField_DF.to_csv(savePath, sep=';')
        
    return(CtField_DF)



def getGlobalTable_ctField(fileName = 'Global_CtFieldData'):
    try:
        savePath = os.path.join(dataDir, (fileName + '.csv'))
        CtField_DF = pd.read_csv(savePath, sep=';')
        for c in CtField_DF.columns:
            if 'Unnamed' in c:
                CtField_DF = CtField_DF.drop([c], axis=1)
        print('Extracted a table with ' + str(CtField_DF.shape[0]) + ' lines and ' + str(CtField_DF.shape[1]) + ' columns.')
        
    except:
        print('No existing table found')
        
    dateExemple = CtField_DF.loc[CtField_DF.index[0],'date']
    if re.match(dateFormatExcel, dateExemple):
        print('dates corrected')
        CtField_DF.loc[:,'date'] = CtField_DF.loc[:,'date'].apply(lambda x: x.split('/')[0] + '-' + x.split('/')[1] + '-' + x.split('/')[2][2:])
#         mecaDF['ManipID'] = mecaDF['ExpDay'] + '_' + mecaDF['CellName'].apply(lambda x: x.split('_')[0])
    return(CtField_DF)

In [17]:
computeGlobalTable_ctField(task='updateExisting',save=False).head()

Unnamed: 0,date,cellName,cellID,manipID,duration,medianRawB,medianThickness,1stDThickness,9thDThickness,fluctuAmpli,R2_polyFit,validated
0,21-02-10,M1_P1_C1,21-02-10_M1_P1_C1,21-02-10_M1,393.55,6.0036,183.232388,103.105084,276.671473,173.566389,0.222994,1.0
1,21-02-10,M1_P1_C2,21-02-10_M1_P1_C2,21-02-10_M1,326.388,6.0036,-58.229968,-147.203982,41.303068,188.507051,0.228033,0.0
2,21-02-10,M1_P1_C3,21-02-10_M1_P1_C3,21-02-10_M1,327.997,6.0036,204.963636,123.738478,277.782516,154.044038,0.402168,1.0
3,21-02-10,M1_P1_C4,21-02-10_M1_P1_C4,21-02-10_M1,326.929,6.0036,203.321336,165.7697,282.439284,116.669584,0.261129,1.0
4,21-02-10,M1_P1_C5,21-02-10_M1_P1_C5,21-02-10_M1,260.26,6.0036,231.480125,169.220847,301.60177,132.380924,0.248455,1.0


In [18]:
getGlobalTable_ctField().head()

Extracted a table with 40 lines and 12 columns.


Unnamed: 0,date,cellName,cellID,manipID,duration,medianRawB,medianThickness,1stDThickness,9thDThickness,fluctuAmpli,R2_polyFit,validated
0,21-02-10,M1_P1_C1,21-02-10_M1_P1_C1,21-02-10_M1,393.55,6.0036,183.232388,103.105084,276.671473,173.566389,0.222994,1.0
1,21-02-10,M1_P1_C2,21-02-10_M1_P1_C2,21-02-10_M1,326.388,6.0036,-58.229968,-147.203982,41.303068,188.507051,0.228033,0.0
2,21-02-10,M1_P1_C3,21-02-10_M1_P1_C3,21-02-10_M1,327.997,6.0036,204.963636,123.738478,277.782516,154.044038,0.402168,1.0
3,21-02-10,M1_P1_C4,21-02-10_M1_P1_C4,21-02-10_M1,326.929,6.0036,203.321336,165.7697,282.439284,116.669584,0.261129,1.0
4,21-02-10,M1_P1_C5,21-02-10_M1_P1_C5,21-02-10_M1,260.26,6.0036,231.480125,169.220847,301.60177,132.380924,0.248455,1.0


### Mechanics

Workflow
* analyseTimeSeries_meca() analyse 1 file and return the dict (with the results of the analysis)
* createMecaDataDict() call the previous function on the given list of files and concatenate the results
* computeGlobalTable_meca() call the previous function and convert the dict to a DataFrame

In [19]:
listColumnsMeca = ['date','cellName','cellID','manipID',\
                   'compNum','compDuration','compStartTime',\
                   'initialThickness','minThickness','maxIndent','previousThickness','surroundingThickness',\
                   'validatedThickness',\
                   'ctFieldThickness','ctFieldFluctuAmpli',\
                   'H0Chadwick','EChadwick','R2Chadwick','EChadwick_CIWidth',\
                   'hysteresis',\
                   'critFit', 'validatedFit','comments'] # 'fitParams',

def compressionFitChadwick(hCompr, fCompr, DIAMETER):
    
    error = False
    
    def chadwickModel(h, E, H0):
        R = DIAMETER/2
        f = (np.pi*E*(R/2)*((H0-h)**2))/(3*H0)
        return(f)

    def inversedChadwickModel(f, E, H0):
        R = DIAMETER/2
        h = H0 - ((3*H0*f)/(np.pi*E*R))**0.5
        return(h)

    # some initial parameter values - must be within bounds
    initH0 = max(hCompr) # H0 ~ h_max
    initE = (3*max(hCompr)*max(fCompr))/(np.pi*(DIAMETER/2)*(max(hCompr)-min(hCompr))**2) # E ~ 3*H0*F_max / pi*R*(H0-h_min)²
#     initH0, initE = initH0*(initH0>0), initE*(initE>0)
    
    initialParameters = [initE, initH0]
#     print(initialParameters)

    # bounds on parameters - initial parameters must be within these
    lowerBounds = (0, 0)
    upperBounds = (np.Inf, np.Inf)
    parameterBounds = [lowerBounds, upperBounds]
    
#     testH = inversedChadwickModel(fCompr, initE, initH0)
#     fig, ax = plt.subplots(1,1)
#     ax.plot(hCompr,fCompr,'b-', linewidth = 0.8)
#     ax.plot(testH,fCompr,'kx', linewidth = 0.8)
#     ax.set_xlabel('h (nm)')
#     ax.set_ylabel('f (pN)')
#     fig.show()
#     print(initialParameters)

    try:
        params, covM = curve_fit(inversedChadwickModel, fCompr, hCompr, initialParameters, bounds = parameterBounds)

        # Previously I fitted with y=F and x=H, but it didn't work so well cause H(t) isn't monotonous:
        # params, covM = curve_fit(chadwickModel, hCompr, fCompr, initialParameters, bounds = parameterBounds)
        # Fitting with the 'inverse Chadwick model', with y=H and x=F is more convenient

        E, H0 = params
        hPredict = inversedChadwickModel(fCompr, E, H0)

        SSR = np.sum((hCompr-hPredict)**2)
        alpha = 0.975
        df = len(fCompr)-len(params)
        q = st.t.ppf(alpha, df) # Student coefficient
        R2 = get_R2(hCompr,hPredict)

        varE = covM[0,0]
        seE = (varE)**0.5
        E, seE = E*1e6, seE*1e6
        confIntE = [E-q*seE, E+q*seE]
        confIntEWidth = 2*q*seE

        varH0 = covM[1,1]
        seH0 = (varH0)**0.5
        confIntH0 = [H0-q*seH0, H0+q*seH0]
        confIntH0Width = 2*q*seH0
        
        
    except:
        error = True
        E, H0, hPredict, R2, confIntE, confIntH0 = -1, -1, np.ones(len(hCompr))*(-1), -1, [-1,-1], [-1,-1]
    
    return(E, H0, hPredict, R2, confIntE, confIntH0, error)



def analyseTimeSeries_meca(f, tsDF, expDf, listColumnsMeca, PLOT, PLOT_SHOW):
    
    split_f = f.split('_')
    tsDF.dx, tsDF.dy, tsDF.dz, tsDF.D2, tsDF.D3 = tsDF.dx*1000, tsDF.dy*1000, tsDF.dz*1000, tsDF.D2*1000, tsDF.D3*1000
    thisManipID = split_f[0] + '_' + split_f[1]
    expDf['manipID'] = expDf['date'] + '_' + expDf['manip']
    thisExpDf = expDf.loc[expDf['manipID'] == thisManipID]
    DIAMETER = thisExpDf.at[thisExpDf.index.values[0], 'bead diameter']
    
    results = {}
    for c in listColumnsMeca:
        results[c] = []
        
    Ncomp = max(tsDF['idxCompression'])
    NimgComp = np.sum((tsDF['idxCompression'] != 0))/Ncomp
    NimgCompTh = round(0.49999999999 + np.sum((tsDF['idxCompression'] != 0))/Ncomp)
    NimgBtwComp = np.sum((tsDF['idxCompression'] == 0))/Ncomp
    NimgBtwCompTh = round(0.4999999999 + np.sum((tsDF['idxCompression'] == 0))/Ncomp)
#     print('Ncomp : ' + str(Ncomp) + ' ; ' + 'NimgComp : ' + str(NimgComp) + '/' + str(NimgCompTh) + ' ; ' + 'NimgBtwComp : ' + str(NimgBtwComp) + '/' + str(NimgBtwCompTh))
#     if not NimgBtwComp%2 == 0:
#         print('Bug with the compressions sequence delimitation')


    # These values are computed once for the whole cell D3 time series, but since the table has 1 line per compression, 
    # that same value will be put in the table for each line corresponding to that cell
    ctFieldH = (tsDF.loc[tsDF['idxCompression'] == 0, 'D3'].values - DIAMETER)
    ctFieldThickness   = np.median(ctFieldH)
    ctFieldFluctuAmpli = np.percentile(ctFieldH,90) - np.percentile(ctFieldH,10)
    
    if PLOT:
        fig1, ax1 = plt.subplots(1,1,figsize=(tsDF.shape[0]*(1/100),3))
        fig2, ax2 = plt.subplots(1,Ncomp,figsize=(2*Ncomp,2))
        ax1.plot(tsDF['T'].values, tsDF['D3'].values-DIAMETER, 'b-', linewidth = 0.75)

    
    for i in range(1, Ncomp+1):#Ncomp+1):

        # (1) Identifiers
        results['date'].append(split_f[0])
        results['cellName'].append(split_f[1] + '_' + split_f[2] + '_' + split_f[3])
        results['cellID'].append(split_f[0] + '_' + split_f[1] + '_' + split_f[2] + '_' + split_f[3])
        results['manipID'].append(split_f[0] + '_' + split_f[1])
        
        # (2) Segment the compression n°i
        thisCompDf = tsDF.loc[tsDF['idxCompression'] == i,:]
        iStart = (findFirst(tsDF['idxCompression'], i))
        iStop = iStart+thisCompDf.shape[0]
        
        # Easy-to-get parameters
        results['compNum'].append(i)
        results['compDuration'].append(thisExpDf.at[thisExpDf.index.values[0], 'compression duration'])
        results['compStartTime'].append(thisCompDf['T'].values[0])
        
        # (3) Inside the compression n°i, delimit the compression and relaxation phases
        
        # Delimit the start of the increase of B (typically the moment when the field decrease from 5 to 3)
        # and the end of its decrease (typically when it goes back from 3 to 5)
        listB = thisCompDf.B.values
        offsetStart, offsetStop = 0, 0
        minB, maxB = min(listB), max(listB)
        thresholdB = (maxB-minB)/50
        
        k = 0
        while (listB[k] > minB+thresholdB) or (listB[-1-k] > minB+thresholdB):
            offsetStart += int(listB[k] > minB+thresholdB)
            offsetStop += int(listB[-1-k] > minB+thresholdB)
            k += 1
        jStart = offsetStart # Beginning of compression
        jMax = np.argmax(thisCompDf.B) # End of compression, beginning of relaxation
        jStop = thisCompDf.shape[0] - offsetStop # End of relaxation
        
        # Four arrays
        hCompr = (thisCompDf.D3.values[jStart:jMax+1] - DIAMETER)
        hRelax = (thisCompDf.D3.values[jMax+1:jStop] - DIAMETER)
        fCompr = (thisCompDf.F.values[jStart:jMax+1])
        fRelax = (thisCompDf.F.values[jMax+1:jStop])
        
        # Refinement of the compression delimitation.
        # Remove the 1-2 points at the begining where there is just the viscous relaxation of the cortex
        # because of the initial decrease of B and the cortex thickness increases.
        offsetStart2 = 0
        k = 0
        while (hCompr[k] < hCompr[k+1]) and k<len(hCompr):
            offsetStart2 += 1
            k += 1
        
        # Better compressions arrays
        hCompr = hCompr[offsetStart2:]
        fCompr = fCompr[offsetStart2:]
        
        # Get the points of constant field preceding and surrounding the current compression
        # Ex : if the labview code was set so that there is 6 points of ct field before and after each compression,
        # previousPoints will contains D3[iStart-12:iStart]
        # surroundingPoints will contains D3[iStart-6:iStart] and D3[iStop:iStop+6]
        previousPoints = (tsDF.D3.values[max(0,iStart-(NimgBtwCompTh)):iStart]) - DIAMETER
        surroundingPoints = np.concatenate([tsDF.D3.values[max(0,iStart-(NimgBtwCompTh//2)):iStart],tsDF.D3.values[iStop:iStop+(NimgBtwCompTh//2)]]) - DIAMETER
        
        # Parameters relative to the thickness ( = D3-DIAMETER)
        results['initialThickness'].append(np.mean(hCompr[0:3]))
        results['minThickness'].append(np.min(hCompr))
        results['maxIndent'].append(results['initialThickness'][-1] - results['minThickness'][-1])
        results['previousThickness'].append(np.median(previousPoints))
        results['surroundingThickness'].append(np.median(surroundingPoints))
        results['ctFieldThickness'].append(ctFieldThickness)
        results['ctFieldFluctuAmpli'].append(ctFieldFluctuAmpli)
        
        validatedThickness = np.min([results['initialThickness'],results['minThickness'],results['previousThickness'],\
                                    results['surroundingThickness'],results['ctFieldThickness']]) > 0
        results['validatedThickness'].append(validatedThickness)

        # (4) Fit with Chadwick model of the force-thickness curve
        
        E, H0, hPredict, R2, confIntE, confIntH0, fitError = compressionFitChadwick(hCompr, fCompr, DIAMETER) # IMPORTANT SUBFUNCTION
        
        R2CRITERION = 0.9
        critFit = 'R2 > ' + str(R2CRITERION)
        results['critFit'].append(critFit)
        validatedFit = (R2 > R2CRITERION)
        
        if PLOT:
            # fig1
            if not fitError:
                if validatedFit:
                    ax1.plot(thisCompDf['T'].values, thisCompDf['D3'].values-DIAMETER, color = 'chartreuse', linestyle = '-', linewidth = 1)
                else:
                    ax1.plot(thisCompDf['T'].values, thisCompDf['D3'].values-DIAMETER, color = 'gold', linestyle = '-', linewidth = 1)
            else:
                ax1.plot(thisCompDf['T'].values, thisCompDf['D3'].values-DIAMETER, color = 'crimson', linestyle = '-', linewidth = 1)
            ax1.set_xlabel('t (s)')
            ax1.set_ylabel('h (nm)')
            fig1.suptitle(results['cellID'][-1])
            
            # fig2
            ax2[i-1].plot(hCompr,fCompr,'b-', linewidth = 0.8)
            ax2[i-1].plot(hRelax,fRelax,'r-', linewidth = 0.8)
            ax2[i-1].title.set_text(results['cellID'][-1] + '__c' + str(i))
            ax2[i-1].set_xlabel('h (nm)')
            ax2[i-1].set_ylabel('f (pN)')
            if not fitError:
                ax2[i-1].plot(hPredict,fCompr,'k--', linewidth = 0.8)
            for item in ([ax2[i-1].title, ax2[i-1].xaxis.label, ax2[i-1].yaxis.label] + ax2[i-1].get_xticklabels() + ax2[i-1].get_yticklabels()):
                item.set_fontsize(6)
                

        
        if not fitError:
            confIntEWidth = abs(confIntE[0] - confIntE[1])

            results['H0Chadwick'].append(H0)
            results['EChadwick'].append(E)
            results['R2Chadwick'].append(R2)
            results['EChadwick_CIWidth'].append(confIntEWidth)
            

            results['validatedFit'].append(validatedFit)
            if validatedFit:
                results['comments'].append('ok')
            else:
                results['comments'].append('R2 < ' + str(R2CRITERION))
                
        if fitError:
            validatedFit = False
            results['H0Chadwick'].append(np.nan)
            results['EChadwick'].append(np.nan)
            results['R2Chadwick'].append(np.nan)
            results['EChadwick_CIWidth'].append(np.nan)
            results['validatedFit'].append(validatedFit)
            results['comments'].append('fitFailure')
            
        
        # (5) hysteresis (its definition may change)
        results['hysteresis'].append(hCompr[0] - hRelax[-1])
    
    if PLOT:
        archiveFig(fig1, ax1, name=results['cellID'][-1] + '_h(t)', figSubDir = 'MecaAnalysis_allCells')
        archiveFig(fig2, ax2, name=results['cellID'][-1] + '_F(h)', figSubDir = 'MecaAnalysis_allCells')
        if PLOT_SHOW:
            fig1.show()
            fig2.tight_layout()
            fig2.show()
        else:
            plt.close('all')
    
    return(results)



def createDataDict_meca(list_mecaFiles, listColumnsMeca, PLOT):
    expDf = getExperimentalConditions()
    tableDict = {}
    Nfiles = len(list_mecaFiles)
    PLOT_SHOW = (Nfiles<11)
    if not PLOT_SHOW:
        plt.ioff()
    for c in listColumnsMeca:
        tableDict[c] = []
    for f in list_mecaFiles: #[36:37]:
#         print(f)
        tS_DataFilePath = os.path.join(timeSeriesDataDir, f)
        current_tsDF = pd.read_csv(tS_DataFilePath, ';')
        current_resultDict = analyseTimeSeries_meca(f, current_tsDF, expDf, listColumnsMeca, PLOT, PLOT_SHOW) # MAIN SUBFUNCTION
        for k in current_resultDict.keys():
            tableDict[k] += current_resultDict[k]
    plt.ion()
    return(tableDict)



def computeGlobalTable_meca(task = 'fromScratch', fileName = 'Global_MecaData', save = False, PLOT = True, \
                            listColumnsMeca=listColumnsMeca):
    """
    Compute the GlobalTable_meca from the time series data files.
    Option task='fromScratch' will analyse all the time series data files and construct a new GlobalTable from them regardless of the existing GlobalTable.
    Option task='updateExisting' will open the existing GlobalTable and determine which of the time series data files are new ones, and will append the existing GlobalTable with the data analysed from those new fils.
    listColumnsMeca have to contain all the fields of the table that will be constructed.
    """
    top = time.time()
    list_mecaFiles = [f for f in os.listdir(timeSeriesDataDir) \
                      if (os.path.isfile(os.path.join(timeSeriesDataDir, f)) and f.endswith(".csv") \
                      and ('R40' in f))] # Change to allow different formats in the future
    # print(mecaTimeSeriesDataFiles)
    
    if task == 'fromScratch':
        # create a dict containing the data
        tableDict = createDataDict_meca(list_mecaFiles, listColumnsMeca, PLOT) # MAIN SUBFUNCTION
        # create the dataframe from it
        meca_DF = pd.DataFrame(tableDict)
        
    elif task == 'updateExisting':
        # get existing table
        try:
            savePath = os.path.join(dataDir, (fileName + '.csv'))
            existing_meca_DF = pd.read_csv(savePath, sep=';')
        except:
            print('No existing table found')
            
        # find which of the time series files are new
        new_mecaTimeSeriesDataFiles = []
        for f in mecaTimeSeriesDataFiles:
            split_f = f.split('_')
            currentCellID = split_f[0] + '_' + split_f[1] + '_' + split_f[2] + '_' + split_f[3]
            if currentCellID not in existing_meca_DF.cellID:
                new_mecaTimeSeriesDataFiles.append(f)
                
        # create the dict with new data
        new_tableDict = createMecaDataDict(new_mecaTimeSeriesDataFiles) # MAIN SUBFUNCTION
        # create the dataframe from it
        new_meca_DF = pd.dataframe(tableDict)
        # fuse the existing table with the new one
        meca_DF = pd.concat([existing_meca_DF, new_meca_DF])
        
    if save:
        saveName = fileName + '.csv'
        savePath = os.path.join(dataDir, saveName)
        meca_DF.to_csv(savePath, sep=';')
    
    delta = time.time() - top
    print(delta)
    
    return(meca_DF)
            

    
def getGlobalTable_meca(fileName = 'Global_mecaData'):
    try:
        savePath = os.path.join(dataDir, (fileName + '.csv'))
        meca_DF = pd.read_csv(savePath, sep=';')
        print('Extracted a table with ' + str(meca_DF.shape[0]) + ' lines and ' + str(meca_DF.shape[1]) + ' columns.')
    except:
        print('No existing table found')
    for c in meca_DF.columns:
            if 'Unnamed' in c:
                meca_DF = meca_DF.drop([c], axis=1)
    if not ('manipID' in meca_DF.columns):
        meca_DF['manipID'] = meca_DF['ExpDay'] + '_' + meca_DF['CellID'].apply(lambda x: x.split('_')[0])
    dateExemple = meca_DF.loc[meca_DF.index[0],'ExpDay']
    if re.match(dateFormatExcel, dateExemple):
        print('bad date')
    return(meca_DF)

In [96]:
A = computeGlobalTable_meca(PLOT = True)
A.head()

Extracted a table with 34 lines and 21 columns.
optical index correction already in float64 type.
195.11612057685852


Unnamed: 0,date,cellName,cellID,manipID,compNum,compDuration,compStartTime,initialThickness,minThickness,maxIndent,...,ctFieldThickness,ctFieldFluctuAmpli,H0Chadwick,EChadwick,R2Chadwick,EChadwick_CIWidth,hysteresis,critFit,validatedFit,comments
0,20-08-04,M1_P1_C5,20-08-04_M1_P1_C5,20-08-04_M1,1,1s,5.036,79.012161,52.924711,26.08745,...,49.050637,76.991876,84.169269,40937.261808,0.573634,42613.398547,-16.043723,R2 > 0.9,False,R2 < 0.9
1,20-08-04,M1_P1_C5,20-08-04_M1_P1_C5,20-08-04_M1,2,1s,20.249,54.007032,10.1788,43.828233,...,49.050637,76.991876,70.212093,7635.904488,0.858945,2882.156895,-18.140355,R2 > 0.9,False,R2 < 0.9
2,20-08-04,M1_P1_C5,20-08-04_M1_P1_C5,20-08-04_M1,3,1s,34.662,46.928798,15.165123,31.763675,...,49.050637,76.991876,56.367992,16173.698405,0.818831,7613.312129,-13.535354,R2 > 0.9,False,R2 < 0.9
3,20-08-04,M1_P1_C5,20-08-04_M1_P1_C5,20-08-04_M1,4,1s,49.076,88.940281,51.951057,36.989223,...,49.050637,76.991876,102.936509,15696.980499,0.90227,5849.28078,-11.761414,R2 > 0.9,True,ok
4,20-08-04,M1_P1_C5,20-08-04_M1_P1_C5,20-08-04_M1,5,1s,63.487,52.169706,19.679992,32.489714,...,49.050637,76.991876,66.07099,13980.256787,0.93604,3626.052994,11.261056,R2 > 0.9,True,ok


In [21]:
getGlobalTable_meca().head()

Extracted a table with 5630 lines and 20 columns.


Unnamed: 0,ExpType,ExpDay,CellID,CellName,CompNum,TpsComp,MaxIndent,MinThickness,Hysteresis,CompTime,...,SurroundingThickness,PreviousThickness,H0Chadwick,EChadwick,CiEChadwick,R2Chadwick,FitParams,Validated,Comments,manipID
0,DictyAx2-Comp,20-02-13,M1_P1_C1,20-02-13_M1_P1_C1,1,2s,172.944935,290.89664,0.46807,5.600158,...,172.291238,138.904119,531.398678,3352.518532,638.664233,0.975972,Strain100-R20.9,1,Succes!,20-02-13_M1
1,DictyAx2-Comp,20-02-13,M1_P1_C1,20-02-13_M1_P1_C1,2,4s,325.048555,340.247874,0.464737,20.967051,...,231.767587,227.523183,792.775299,1185.299024,59.05605,0.996431,Strain100-R20.9,0,Disaligned vertically,20-02-13_M1
2,DictyAx2-Comp,20-02-13,M1_P1_C1,20-02-13_M1_P1_C1,3,8s,563.627073,90.15585,0.901448,40.704944,...,192.202443,262.920701,725.884048,676.576117,74.418428,0.960774,Strain100-R20.9,0,Disaligned vertically & Has large jumps,20-02-13_M1
3,DictyAx2-Comp,20-02-13,M1_P1_C1,20-02-13_M1_P1_C1,4,2s,500.630991,321.764384,0.729998,66.117053,...,167.247232,184.655264,955.873791,685.530417,109.03,0.983909,Strain100-R20.9,0,Disaligned vertically,20-02-13_M1
4,DictyAx2-Comp,20-02-13,M1_P1_C1,20-02-13_M1_P1_C1,5,4s,,136.139589,-0.100595,81.47925,...,161.661792,162.303075,10500.987861,12.447549,33.721328,0.041727,Strain100-R20.9,0,Disaligned vertically & Has too big CI on E & ...,20-02-13_M1


In [22]:
# Comparison of values obtained with the python code vs matlab

# Abis = A.loc[A['cellID'] == '21-01-18_M2_P1_C3'][['compNum','H0Chadwick','EChadwick','R2Chadwick','EChadwick_CIWidth']]
# Abis.head()

# B = getGlobalTable_meca()
# Bbis = B.loc[B['CellName'] == '21-01-18_M2_P1_C3'][['CompNum','H0Chadwick','EChadwick','R2Chadwick','CiEChadwick']]
# Bbis.index = [i for i in range(Bbis.shape[0])]
# Bbis = Bbis.rename(columns={'CompNum': 'compNum', 'CiEChadwick': 'EChadwick_CIWidth'})
# Bbis.head()

# C = (Abis-Bbis)/Abis
# C.head()

### Fluorescence

In [23]:
def getFluoData(save = False):
    # Getting the table
    fluoDataFile = 'FluoQuantification.csv'
    fluoDataFilePath = os.path.join(dataDir, fluoDataFile)
    fluoDF = pd.read_csv(fluoDataFilePath, sep=';',header=0)
    print('Extracted a table with ' + str(fluoDF.shape[0]) + ' lines and ' + str(fluoDF.shape[1]) + ' columns.')
    # Cleaning the table
    try:
        for c in fluoDF.columns:
            if 'Unnamed' in c:
                fluoDF = fluoDF.drop([c], axis=1)
        
    except:
        print('Unexpected bug with the cleaning step')

    if save:
        saveName = 'FluoQuantification.csv'
        savePath = os.path.join(dataDir, saveName)
        fluoDF.to_csv(savePath, sep=';')

    
    return(fluoDF)

In [24]:
getFluoData().head()

Extracted a table with 58 lines and 2 columns.


Unnamed: 0,cellID,meanFluoPeakAmplitude
0,21-01-18_M1_P1_C5,1798.786253
1,21-01-18_M1_P1_C6,768.192944
2,21-01-18_M1_P1_C7,686.701203
3,21-01-18_M1_P1_C8,1655.53462
4,21-01-18_M1_P1_C9,187.560043


### Data import & DataFrame formatting

In [64]:
pd.set_option('max_columns', None)
pd.reset_option('max_columns')
pd.set_option('max_rows', None)
pd.reset_option('max_rows')

In [26]:
# getExperimentalConditions().head()
# getGlobalTable_ctField().head()
# getGlobalTable_meca().head()
# getFluoData().head()

In [27]:
# GlobalTable_ctField

GlobalTable_ctField = getGlobalTable_ctField()
table_ExpConditions = getExperimentalConditions()
table_fluo = getFluoData()
GlobalTable_ctField = pd.merge(table_ExpConditions, GlobalTable_ctField, how="inner", on='manipID',
#     left_on=None,right_on=None,left_index=False,right_index=False,sort=True,
#     suffixes=("_x", "_y"),copy=True,indicator=False,validate=None,
)
GlobalTable_ctField = pd.merge(GlobalTable_ctField, table_fluo, how="left", on='cellID',
#     left_on=None,right_on=None,left_index=False,right_index=False,sort=True,
#     suffixes=("_x", "_y"),copy=True,indicator=False,validate=None,
)
print('Merged table has ' + str(GlobalTable_ctField.shape[0]) + ' lines and ' + str(GlobalTable_ctField.shape[1]) + ' columns.')

GlobalTable_ctField.head()

Extracted a table with 40 lines and 12 columns.
Extracted a table with 34 lines and 21 columns.
optical index correction already in float64 type.
Extracted a table with 58 lines and 2 columns.
Merged table has 40 lines and 33 columns.


Unnamed: 0,date_x,manip,experimentType,drug,substrate,objective magnification,scale pixel per um,objective immersion,optical index correction,magnetic field correction,cell type,cell subtype,bead type,bead diameter,normal field,ramp field,compression duration,with fluo images,bacteria,comments,manipID,date_y,cellName,cellID,duration,medianRawB,medianThickness,1stDThickness,9thDThickness,fluctuAmpli,R2_polyFit,validated,meanFluoPeakAmplitude
0,21-02-10,M1,constant field,none,20um fibronectin discs,100X,15.8,oil,0.875,1.2,3T3,aSFL,M450,4503,5,[],,True,,,21-02-10_M1,21-02-10,M1_P1_C1,21-02-10_M1_P1_C1,393.55,6.0036,183.232388,103.105084,276.671473,173.566389,0.222994,1.0,
1,21-02-10,M1,constant field,none,20um fibronectin discs,100X,15.8,oil,0.875,1.2,3T3,aSFL,M450,4503,5,[],,True,,,21-02-10_M1,21-02-10,M1_P1_C2,21-02-10_M1_P1_C2,326.388,6.0036,-58.229968,-147.203982,41.303068,188.507051,0.228033,0.0,
2,21-02-10,M1,constant field,none,20um fibronectin discs,100X,15.8,oil,0.875,1.2,3T3,aSFL,M450,4503,5,[],,True,,,21-02-10_M1,21-02-10,M1_P1_C3,21-02-10_M1_P1_C3,327.997,6.0036,204.963636,123.738478,277.782516,154.044038,0.402168,1.0,
3,21-02-10,M1,constant field,none,20um fibronectin discs,100X,15.8,oil,0.875,1.2,3T3,aSFL,M450,4503,5,[],,True,,,21-02-10_M1,21-02-10,M1_P1_C4,21-02-10_M1_P1_C4,326.929,6.0036,203.321336,165.7697,282.439284,116.669584,0.261129,1.0,
4,21-02-10,M1,constant field,none,20um fibronectin discs,100X,15.8,oil,0.875,1.2,3T3,aSFL,M450,4503,5,[],,True,,,21-02-10_M1,21-02-10,M1_P1_C5,21-02-10_M1_P1_C5,260.26,6.0036,231.480125,169.220847,301.60177,132.380924,0.248455,1.0,


In [28]:
# GlobalTable_meca

GlobalTable_meca = getGlobalTable_meca()
table_ExpConditions = getExperimentalConditions()
table_fluo = getFluoData()
GlobalTable_meca = pd.merge(GlobalTable_meca, table_ExpConditions, how="inner", on='manipID',
#     left_on=None,right_on=None,left_index=False,right_index=False,sort=True,
#     suffixes=("_x", "_y"),copy=True,indicator=False,validate=None,
)
GlobalTable_meca = pd.merge(GlobalTable_meca, table_fluo, how="left", left_on='CellName', right_on='cellID'
#     left_on=None,right_on=None,left_index=False,right_index=False,sort=True,
#     suffixes=("_x", "_y"),copy=True,indicator=False,validate=None,
)
print('Merged table has ' + str(GlobalTable_meca.shape[0]) + ' lines and ' + str(GlobalTable_meca.shape[1]) + ' columns.')

GlobalTable_meca.tail()

Extracted a table with 5630 lines and 20 columns.
Extracted a table with 34 lines and 21 columns.
optical index correction already in float64 type.
Extracted a table with 58 lines and 2 columns.
Merged table has 1569 lines and 43 columns.


Unnamed: 0,ExpType,ExpDay,CellID,CellName,CompNum,TpsComp,MaxIndent,MinThickness,Hysteresis,CompTime,InitialThickness,SurroundingThickness,PreviousThickness,H0Chadwick,EChadwick,CiEChadwick,R2Chadwick,FitParams,Validated,Comments,manipID,date,manip,experimentType,drug,substrate,objective magnification,scale pixel per um,objective immersion,optical index correction,magnetic field correction,cell type,cell subtype,bead type,bead diameter,normal field,ramp field,compression duration,with fluo images,bacteria,comments,cellID,meanFluoPeakAmplitude
1564,3T3aSFL-6FP_nodrug,21-04-28,M2_P1_C13-2,21-04-28_M2_P1_C13-2,9,1s,70.808738,203.024049,0.584341,111.490545,273.832787,281.184191,281.184191,301.276825,13357.717764,681.53694,0.997929,Strain100-R20.9,1,Succes!,21-04-28_M2,21-04-28,M2,compressions,none,20um fibronectin discs,100X,15.8,oil,0.875,1.15,3T3,aSFL-6FP,M450,4503,5,['3'. '40'],1s,False,,,,
1565,3T3aSFL-6FP_nodrug,21-04-28,M2_P1_C13-2,21-04-28_M2_P1_C13-2,10,1s,133.594515,262.368033,0.984246,4.342545,395.962548,284.477982,,436.440808,5447.209967,956.131869,0.974657,Strain100-R20.9,0,Has large jumps,21-04-28_M2,21-04-28,M2,compressions,none,20um fibronectin discs,100X,15.8,oil,0.875,1.15,3T3,aSFL-6FP,M450,4503,5,['3'. '40'],1s,False,,,,
1566,3T3aSFL-6FP_nodrug,21-04-28,M2_P1_C14,21-04-28_M2_P1_C14,1,1s,7.349549,2.101403,-0.6763,16.563636,9.450952,0.795231,0.795231,23.237686,21431.797804,22577.976147,0.333494,Strain100-R20.9,0,Bad R2 on fit,21-04-28_M2,21-04-28,M2,compressions,none,20um fibronectin discs,100X,15.8,oil,0.875,1.15,3T3,aSFL-6FP,M450,4503,5,['3'. '40'],1s,False,,,,
1567,3T3aSFL-6FP_nodrug,21-04-28,M2_P1_C14,21-04-28_M2_P1_C14,2,1s,,,,,,,,,,,,Strain100-R20.9,0,Couldn't be fitted,21-04-28_M2,21-04-28,M2,compressions,none,20um fibronectin discs,100X,15.8,oil,0.875,1.15,3T3,aSFL-6FP,M450,4503,5,['3'. '40'],1s,False,,,,
1568,3T3aSFL-6FP_nodrug,21-04-28,M2_P1_C14,21-04-28_M2_P1_C14,3,1s,5.615079,5.206309,-2.229363,4.361564,10.821388,4.503973,,21.247305,34875.49594,29856.452437,0.498364,Strain100-R20.9,0,Bad R2 on fit,21-04-28_M2,21-04-28,M2,compressions,none,20um fibronectin discs,100X,15.8,oil,0.875,1.15,3T3,aSFL-6FP,M450,4503,5,['3'. '40'],1s,False,,,,


In [65]:
# GlobalTable_mecaBis

GlobalTable_mecaBis = computeGlobalTable_meca(PLOT = False)

table_ExpConditions = getExperimentalConditions()
table_fluo = getFluoData()
GlobalTable_mecaBis = pd.merge(GlobalTable_mecaBis, table_ExpConditions, how="inner", on='manipID',
#     left_on=None,right_on=None,left_index=False,right_index=False,sort=True,
#     suffixes=("_x", "_y"),copy=True,indicator=False,validate=None,
)
GlobalTable_mecaBis = pd.merge(GlobalTable_mecaBis, table_fluo, how="left", left_on='cellID', right_on='cellID'
#     left_on=None,right_on=None,left_index=False,right_index=False,sort=True,
#     suffixes=("_x", "_y"),copy=True,indicator=False,validate=None,
)
print('Merged table has ' + str(GlobalTable_mecaBis.shape[0]) + ' lines and ' + str(GlobalTable_mecaBis.shape[1]) + ' columns.')

Extracted a table with 34 lines and 21 columns.
optical index correction already in float64 type.
23.479490280151367
Extracted a table with 34 lines and 21 columns.
optical index correction already in float64 type.
Extracted a table with 108 lines and 2 columns.
Merged table has 2089 lines and 44 columns.


In [33]:
GlobalTable_mecaBis.tail()

Unnamed: 0,date_x,cellName,cellID,manipID,compNum,compDuration,compStartTime,initialThickness,minThickness,maxIndent,previousThickness,surroundingThickness,validatedThickness,ctFieldThickness,ctFieldFluctuAmpli,H0Chadwick,EChadwick,R2Chadwick,EChadwick_CIWidth,hysteresis,critFit,validatedFit,comments_x,date_y,manip,experimentType,drug,substrate,objective magnification,scale pixel per um,objective immersion,optical index correction,magnetic field correction,cell type,cell subtype,bead type,bead diameter,normal field,ramp field,compression duration,with fluo images,bacteria,comments_y,meanFluoPeakAmplitude
2084,21-06-17,M2_P1_C9,21-06-17_M2_P1_C9,21-06-17_M2,6,1s,63.627,180.848673,109.731896,71.116777,193.817041,188.368332,True,197.210345,56.444054,194.280385,9006.941616,0.869367,3053.504221,14.161594,R2 > 0.9,False,R2 < 0.9,21-06-17,M2,compressions,none,20um fibronectin discs,100X,15.8,oil,0.875,1.1,3T3,aSFL-A8,M450,4503,5,['3'. '40'],1s,True,,,77.71709
2085,21-06-17,M2_P1_C9,21-06-17_M2_P1_C9,21-06-17_M2,7,1s,75.543,234.485517,118.49684,115.988677,197.122107,212.652351,True,197.210345,56.444054,263.142485,4534.507073,0.951283,862.463086,30.461031,R2 > 0.9,True,ok,21-06-17,M2,compressions,none,20um fibronectin discs,100X,15.8,oil,0.875,1.1,3T3,aSFL-A8,M450,4503,5,['3'. '40'],1s,True,,,77.71709
2086,21-06-17,M2_P1_C9,21-06-17_M2_P1_C9,21-06-17_M2,8,1s,87.473,214.686948,122.724912,91.962036,214.328291,212.498396,True,197.210345,56.444054,247.024077,5978.063469,0.956488,1100.280729,7.261926,R2 > 0.9,True,ok,21-06-17,M2,compressions,none,20um fibronectin discs,100X,15.8,oil,0.875,1.1,3T3,aSFL-A8,M450,4503,5,['3'. '40'],1s,True,,,77.71709
2087,21-06-17,M2_P1_C9,21-06-17_M2_P1_C9,21-06-17_M2,9,1s,99.377,197.260596,114.63195,82.628646,196.664406,182.698845,True,197.210345,56.444054,220.871034,7009.702181,0.913374,1878.68581,25.721146,R2 > 0.9,True,ok,21-06-17,M2,compressions,none,20um fibronectin discs,100X,15.8,oil,0.875,1.1,3T3,aSFL-A8,M450,4503,5,['3'. '40'],1s,True,,,77.71709
2088,21-06-17,M2_P1_C9,21-06-17_M2_P1_C9,21-06-17_M2,10,1s,111.308,162.626076,115.017081,47.608995,167.01194,161.706524,True,197.210345,56.444054,176.849983,17769.009781,0.980165,2270.752159,2.821165,R2 > 0.9,True,ok,21-06-17,M2,compressions,none,20um fibronectin discs,100X,15.8,oil,0.875,1.1,3T3,aSFL-A8,M450,4503,5,['3'. '40'],1s,True,,,77.71709


In [31]:
# Experiment counter - Matlab table

cellID = 'cellID'
GlobalTable_ctField_CountCell = GlobalTable_ctField.groupby(['cell type', 'cell subtype', 'bead type', 'drug', 'substrate']).count()
GlobalTable_ctField_CountCell = GlobalTable_ctField_CountCell.loc[:, [cellID]].rename(columns={cellID : 'Count cells - ctField'})

cellID = 'CellName'
GlobalTable_meca_CountComp = GlobalTable_meca.groupby(['cell type', 'cell subtype', 'bead type', 'drug', 'substrate']).count()
GlobalTable_meca_CountComp = GlobalTable_meca_CountComp.loc[:, [cellID]].rename(columns={cellID : 'Count compressions'})

cellID = 'CellName'
group = GlobalTable_meca.groupby(cellID)
dictAggMean = getDictAggMean(GlobalTable_meca)
GlobalTable_meca_perCell = group.agg(dictAggMean)
GlobalTable_meca_CountCell = GlobalTable_meca_perCell.groupby(['cell type', 'cell subtype', 'bead type', 'drug', 'substrate']).count()
GlobalTable_meca_CountCell = GlobalTable_meca_CountCell.loc[:, [cellID]].rename(columns={cellID : 'Count cells - meca'})


# Fuse all the previous tables
GlobalTable_CountAll = pd.concat([GlobalTable_ctField_CountCell, GlobalTable_meca_CountCell, GlobalTable_meca_CountComp], axis=1)
GlobalTable_CountAll = GlobalTable_CountAll.fillna(0)
GlobalTable_CountAll = GlobalTable_CountAll.loc[:,:].astype(int)
GlobalTable_CountAll

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Count cells - ctField,Count cells - meca,Count compressions
cell type,cell subtype,bead type,drug,substrate,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
3T3,aSFL,M450,doxycyclin,20um fibronectin discs,20,28,352
3T3,aSFL,M450,doxycyclin,BSA coated glass,0,16,161
3T3,aSFL,M450,none,20um fibronectin discs,20,32,392
3T3,aSFL,M450,none,BSA coated glass,0,13,117
3T3,aSFL-6FP,M450,doxycyclin,20um fibronectin discs,0,26,254
3T3,aSFL-6FP,M450,none,20um fibronectin discs,0,29,293


In [38]:
# Experiment counter - Python table

cellID = 'cellID'
GlobalTable_ctField_CountCell = GlobalTable_ctField.groupby(['cell type', 'cell subtype', 'bead type', 'drug', 'substrate']).count()
GlobalTable_ctField_CountCell = GlobalTable_ctField_CountCell.loc[:, [cellID]].rename(columns={cellID : 'Count cells - ctField'})

cellID = 'cellID'
GlobalTable_meca_CountComp = GlobalTable_mecaBis.groupby(['cell type', 'cell subtype', 'bead type', 'drug', 'substrate']).count()
GlobalTable_meca_CountComp = GlobalTable_meca_CountComp.loc[:, [cellID]].rename(columns={cellID : 'Count compressions'})

cellID = 'cellID'
group = GlobalTable_mecaBis.groupby(cellID)
dictAggMean = getDictAggMean(GlobalTable_mecaBis)
GlobalTable_meca_perCell = group.agg(dictAggMean)
GlobalTable_meca_CountCell = GlobalTable_meca_perCell.groupby(['cell type', 'cell subtype', 'bead type', 'drug', 'substrate']).count()
GlobalTable_meca_CountCell = GlobalTable_meca_CountCell.loc[:, [cellID]].rename(columns={cellID : 'Count cells - meca'})


# Fuse all the previous tables
GlobalTable_CountAll = pd.concat([GlobalTable_ctField_CountCell, GlobalTable_meca_CountCell, GlobalTable_meca_CountComp], axis=1)
GlobalTable_CountAll = GlobalTable_CountAll.fillna(0)
GlobalTable_CountAll = GlobalTable_CountAll.loc[:,:].astype(int)
GlobalTable_CountAll

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Count cells - ctField,Count cells - meca,Count compressions
cell type,cell subtype,bead type,drug,substrate,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
3T3,aSFL,M450,doxycyclin,20um fibronectin discs,20,28,352
3T3,aSFL,M450,doxycyclin,BSA coated glass,0,16,161
3T3,aSFL,M450,none,20um fibronectin discs,20,32,392
3T3,aSFL,M450,none,BSA coated glass,0,13,117
3T3,aSFL-6FP,M450,doxycyclin,20um fibronectin discs,0,26,254
3T3,aSFL-6FP,M450,none,20um fibronectin discs,0,29,293
3T3,aSFL-A8,M450,doxycyclin,20um fibronectin discs,0,25,260
3T3,aSFL-A8,M450,none,20um fibronectin discs,0,23,260


In [51]:
Filters = [(GlobalTable_mecaBis['validatedFit'] == True), (GlobalTable_mecaBis['validatedThickness'] == True), (GlobalTable_mecaBis['cell subtype'] == 'aSFL-A8')]
GlobalTable_mecaBisF = GlobalTable_mecaBis
for fltr in Filters:
        GlobalTable_mecaBisF = GlobalTable_mecaBisF.loc[fltr]
# GlobalTable_mecaBis
cellID = 'cellID'
group = GlobalTable_mecaBisF.groupby(cellID)
dictAggMean = getDictAggMean(GlobalTable_mecaBisF)
GlobalTable_mecaBisF_perCell = group.agg(dictAggMean)
GlobalTable_mecaBisF_perCell

Unnamed: 0_level_0,date_x,cellName,cellID,manipID,compNum,compDuration,compStartTime,initialThickness,minThickness,maxIndent,previousThickness,surroundingThickness,validatedThickness,ctFieldThickness,ctFieldFluctuAmpli,H0Chadwick,EChadwick,R2Chadwick,EChadwick_CIWidth,hysteresis,critFit,validatedFit,comments_x,date_y,manip,experimentType,drug,substrate,objective magnification,scale pixel per um,objective immersion,optical index correction,magnetic field correction,cell type,cell subtype,bead type,bead diameter,normal field,ramp field,compression duration,with fluo images,bacteria,comments_y,meanFluoPeakAmplitude
cellID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
21-06-16_M1_P1_C1,21-06-16,M1_P1_C1,21-06-16_M1_P1_C1,21-06-16_M1,6.2,1s,65.3942,214.154738,150.43376,63.720979,186.4361,182.337429,True,175.496972,70.140305,233.610688,20220.567547,0.971144,3486.463787,18.059388,R2 > 0.9,True,ok,21-06-16,M1,compressions,none,20um fibronectin discs,100X,15.8,oil,0.875,1.1,3T3,aSFL-A8,M450,4503,5,['3'. '40'],1s,True,,,116.527305
21-06-16_M1_P1_C11,21-06-16,M1_P1_C11,21-06-16_M1_P1_C11,21-06-16_M1,6.166667,1s,65.375667,330.912682,255.755614,75.157068,291.097714,289.953241,True,301.316427,41.611739,353.664131,13530.492104,0.957228,2609.448056,22.664335,R2 > 0.9,True,ok,21-06-16,M1,compressions,none,20um fibronectin discs,100X,15.8,oil,0.875,1.1,3T3,aSFL-A8,M450,4503,5,['3'. '40'],1s,True,,,222.577455
21-06-16_M1_P1_C12,21-06-16,M1_P1_C12,21-06-16_M1_P1_C12,21-06-16_M1,7.0,1s,75.213923,256.305341,150.455642,105.849699,241.46073,240.569161,True,233.663297,145.690252,294.747022,6937.305995,0.982486,856.805738,28.29152,R2 > 0.9,True,ok,21-06-16,M1,compressions,none,20um fibronectin discs,100X,15.8,oil,0.875,1.1,3T3,aSFL-A8,M450,4503,5,['3'. '40'],1s,True,,,97.137882
21-06-16_M1_P1_C2,21-06-16,M1_P1_C2,21-06-16_M1_P1_C2,21-06-16_M1,5.5,1s,57.1911,665.788381,363.873163,301.915219,639.131271,640.364191,True,637.052966,89.075566,772.401701,1768.790171,0.980047,226.003848,112.164311,R2 > 0.9,True,ok,21-06-16,M1,compressions,none,20um fibronectin discs,100X,15.8,oil,0.875,1.1,3T3,aSFL-A8,M450,4503,5,['3'. '40'],1s,True,,,106.45892
21-06-16_M1_P1_C3,21-06-16,M1_P1_C3,21-06-16_M1_P1_C3,21-06-16_M1,6.25,1s,66.32375,619.42816,189.645403,429.782757,513.226584,503.271172,True,497.5514,187.765627,757.967084,976.07169,0.986311,78.596533,164.627723,R2 > 0.9,True,ok,21-06-16,M1,compressions,none,20um fibronectin discs,100X,15.8,oil,0.875,1.1,3T3,aSFL-A8,M450,4503,5,['3'. '40'],1s,True,,,41.632854
21-06-16_M1_P1_C5,21-06-16,M1_P1_C5,21-06-16_M1_P1_C5,21-06-16_M1,10.5,1s,117.44555,286.301872,216.570755,69.731117,288.092834,284.942573,True,276.746029,68.915277,313.001372,14561.002759,0.968773,2389.692439,20.850384,R2 > 0.9,True,ok,21-06-16,M1,compressions,none,20um fibronectin discs,100X,15.8,oil,0.875,1.1,3T3,aSFL-A8,M450,4503,5,['3'. '40'],1s,True,,,88.509068
21-06-16_M1_P1_C6,21-06-16,M1_P1_C6,21-06-16_M1_P1_C6,21-06-16_M1,5.5,1s,57.5092,288.916561,205.416349,83.500211,279.021791,273.820353,True,263.762371,96.541019,316.852451,13378.60531,0.97958,1808.561019,30.351865,R2 > 0.9,True,ok,21-06-16,M1,compressions,none,20um fibronectin discs,100X,15.8,oil,0.875,1.1,3T3,aSFL-A8,M450,4503,5,['3'. '40'],1s,True,,,140.83862
21-06-16_M1_P1_C7,21-06-16,M1_P1_C7,21-06-16_M1_P1_C7,21-06-16_M1,6.0,1s,63.83525,226.791056,119.359401,107.431655,204.039574,205.265248,True,200.983356,62.938155,259.52926,6989.575079,0.97429,943.011094,40.736882,R2 > 0.9,True,ok,21-06-16,M1,compressions,none,20um fibronectin discs,100X,15.8,oil,0.875,1.1,3T3,aSFL-A8,M450,4503,5,['3'. '40'],1s,True,,,44.053323
21-06-16_M2_P1_C1,21-06-16,M2_P1_C1,21-06-16_M2_P1_C1,21-06-16_M2,5.5,1s,57.9373,380.438451,243.357217,137.081233,364.502468,359.086836,True,354.270089,107.133218,434.304633,5526.232931,0.963089,949.187651,48.773043,R2 > 0.9,True,ok,21-06-16,M2,compressions,doxycyclin,20um fibronectin discs,100X,15.8,oil,0.875,1.1,3T3,aSFL-A8,M450,4503,5,['3'. '40'],1s,True,,,117.426902
21-06-16_M2_P1_C10,21-06-16,M2_P1_C10,21-06-16_M2_P1_C10,21-06-16_M2,5.714286,1s,60.471429,1049.40485,522.474595,526.930256,1010.150825,942.944462,True,927.548985,724.785228,1218.65352,974.608919,0.941391,226.225615,209.550253,R2 > 0.9,True,ok,21-06-16,M2,compressions,doxycyclin,20um fibronectin discs,100X,15.8,oil,0.875,1.1,3T3,aSFL-A8,M450,4503,5,['3'. '40'],1s,True,,,606.113726


## Plots

### Functions

In [56]:
Styles = {''} # Project of automatic formatting according to the type of data

These functions use matplotlib.pyplot and seaborn libraries to display 1D categorical or 2D plots

In [57]:
def D1Plot(data, CondCol=[],Parameters=[],Filters=[],Boxplot=True,AvgPerCell=False,cellID='cellID', co_order=[],\
           stats=True,statMethod='Mann-Whitney',box_pairs=[],figSizeFactor = 1,markerSizeFactor=1):
    data_filtered = data
    for fltr in Filters:
        data_filtered = data_filtered.loc[fltr]
        
    NCond = len(CondCol)    
    if NCond == 1:
        CondCol = CondCol[0]
    elif NCond > 1:
        newColName = ''
        for i in range(NCond):
            newColName += CondCol[i]
            newColName += ' & '
        newColName = newColName[:-3]
        data_filtered[newColName] = ''
        for i in range(NCond):
            data_filtered[newColName] += data_filtered[CondCol[i]].astype(str)
            data_filtered[newColName] = data_filtered[newColName].apply(lambda x : x + ' & ')
        data_filtered[newColName] = data_filtered[newColName].apply(lambda x : x[:-3])
        CondCol = newColName
    
    if AvgPerCell:
        group = data_filtered.groupby(cellID)
        dictAggMean = getDictAggMean(data_filtered)
        data_filtered = group.agg(dictAggMean)
        
    data_filtered.sort_values(CondCol, axis=0, ascending=True, inplace=True)
    
    NPlots = len(Parameters)
    Conditions = list(data_filtered[CondCol].unique())
    if len(co_order) == 0:
        co_order = Conditions
        
    fig, ax = plt.subplots(1, NPlots, figsize = (5*NPlots*NCond*figSizeFactor,5))
    markerSize = 5*markerSizeFactor
    
    if NPlots > 1:
        for k in range(NPlots):
            
            if Parameters[k] == 'EChadwick':
                ax[k].set_yscale('log')
                

            if Boxplot:
                sns.boxplot(x=CondCol, y=Parameters[k], data=data_filtered, ax=ax[k], 
                            color='w', linewidth = 2, width = 0.5, showfliers = False, order= co_order)
                # data_filtered.boxplot(column=Parameters[k], by = CondCol, ax=ax[k],showfliers = False) # linewidth = 2, width = 0.5
            
            if stats:
                if len(box_pairs) == 0:
                    box_pairs = makeBoxPairs(co_order)
                addStat(ax[k], data_filtered, box_pairs, Parameters[k], CondCol, test = statMethod)
#                 add_stat_annotation(ax[k], x=CondCol, y=Parameters[k], data=data_filtered,box_pairs = box_pairs,test=statMethod, text_format='star',loc='inside', verbose=2)
                    
            sns.swarmplot(x=CondCol, y=Parameters[k], data=data_filtered, ax=ax[k], 
                          size=markerSize, edgecolor='k',linewidth = 1*markerSizeFactor, order= co_order)
            
            ax[k].set_xlabel('')
            ax[k].set_ylabel(Parameters[k])
            ax[k].tick_params(axis='x', labelrotation = 10)
            ax[k].yaxis.grid(True)
#             if Parameters[k] == 'EChadwick_log':
                
            
            
    else:
#         vals = []
#         for i in range(len(Conditions)):
#             vals.append(data_filtered[data_filtered[CondCol] == Conditions[i]][Parameters[0]])
        sns.swarmplot(x=CondCol, y=Parameters[0], data=data_filtered, ax=ax, 
                          size=markerSize, edgecolor='k',linewidth = 0.5, order= co_order)
        if Boxplot:
            sns.boxplot(x=CondCol, y=Parameters[0], data=data_filtered, ax=ax, 
                        color='w', linewidth = 2, width = 0.5, showfliers = False, order= co_order)
        if stats:
            if len(box_pairs) == 0:
                box_pairs = makeBoxPairs(co_order)
            add_stat_annotation(ax, x=CondCol, y=Parameters[0], data=data_filtered,
                               box_pairs = box_pairs,
                               test=statMethod, text_format='star',
                               loc='inside', verbose=statVerbose)
        ax.set_xlabel('')
        ax.set_ylabel(Parameters[0])
        ax.tick_params(axis='x', labelrotation = 10)
        ax.yaxis.grid(True)
    return(fig, ax)

In [58]:
def D2Plot(data, XCol='',YCol='',CondCol='',Filters=[], cellID='cellID', AvgPerCell=False, modelFit=False, modelType='y=ax+b'):
    data_filtered = data
    for fltr in Filters:
        data_filtered = data_filtered.loc[fltr]
    
    NCond = len(CondCol)    
    if NCond == 1:
        CondCol = CondCol[0]
    elif NCond > 1:
        newColName = ''
        for i in range(NCond):
            newColName += CondCol[i]
            newColName += ' & '
        newColName = newColName[:-3]
        data_filtered[newColName] = ''
        for i in range(NCond):
            data_filtered[newColName] += data_filtered[CondCol[i]].astype(str)
            data_filtered[newColName] = data_filtered[newColName].apply(lambda x : x + ' & ')
        data_filtered[newColName] = data_filtered[newColName].apply(lambda x : x[:-3])
        CondCol = newColName
    
    if AvgPerCell:
        group = data_filtered.groupby(cellID)
        dictAggMean = getDictAggMean(data_filtered)
        data_filtered = group.agg(dictAggMean.pop(cellID)) #.reset_index(level=0, inplace=True)
        data_filtered.reset_index(level=0, inplace=True)
        
    Conditions = list(data_filtered[CondCol].unique())
    fig, ax = plt.subplots(1, 1, figsize = (8,5))
    markerSize = 5
    
    if modelFit:
        # Tweak the style cycle to plot for each condition: the points ('o') and then the fit ('-') with the same color.
        ncustom_color_list = list(np.array([new_color_list, new_color_list]).flatten(order='F'))
        # if new_color_list was ['red', 'green', blue'], custom_color_list is now ['red', 'red', 'green', 'green', blue', blue']
        cc = cycler(color=ncustom_color_list)
        ax.set_prop_cycle(cc)
    
    for c in Conditions:
        Xraw = data_filtered[data_filtered[CondCol] == c][XCol].values
        Yraw = data_filtered[data_filtered[CondCol] == c][YCol].values
        XYraw = np.array([Xraw,Yraw]).T
        XY = XYraw[~np.isnan(XYraw).any(axis=1), :]
        X, Y = XY[:,0], XY[:,1]
        if len(X) > 0:
            eqnText = ''

            if modelFit:
                print('Fitting condition: ' + c + ' with model ' + modelType)
                if modelType == 'y=ax+b':
                    params, results = fitLine(X, Y) # Y=a*X+b ; params[0] = b,  params[1] = a
                    pval = results.pvalues[1] # pvalue on the param 'a'
                    eqnText += " ; Y = {:.1f} X + {:.1f}".format(params[1], params[0])
                    eqnText += " ; p-val = {:.2e}".format(pval)
                    print("Y = {:.5} X + {:.5}".format(params[1], params[0]))
                    print("p-value on the 'a' coefficient: {:.4e}".format(pval))
                    print('\n')
                    fitY = params[1]*X + params[0]
                    imin = np.argmin(X)
                    imax = np.argmax(X)
                    ax.plot([X[imin],X[imax]], [fitY[imin],fitY[imax]], '--', lw = '1')

                elif modelType == 'y=A*exp(kx)':
                    params, results = fitLine(X, np.log(Y)) # Y=a*X+b ; params[0] = b,  params[1] = a
                    pval = results.pvalues[1] # pvalue on the param 'a'
                    eqnText += " ; Y = {:.1f}*exp({:.1f}*X)".format(params[0], params[1])
                    eqnText += " ; p-val = {:.2e}".format(pval)
                    print("Y = {:.5}*exp({:.5}*X)".format(np.exp(params[0]), params[1]))
                    print("p-value on the 'k' coefficient: {:.4e}".format(pval))
                    print('\n')
                    fitY = np.exp(params[0])*np.exp(params[1]*X)
                    imin = np.argmin(X)
                    imax = np.argmax(X)
                    ax.plot([X[imin],X[imax]], [fitY[imin],fitY[imax]], '--', lw = '1')
                    
            ax.plot(X, Y, 'o', markersize = markerSize, markeredgecolor='k', markeredgewidth = 1, label=c + eqnText)
            
            
    ax.set_xlabel(XCol)
    ax.set_xlim([min(0,1.1*np.min(data_filtered[XCol])), 1.1*np.max(data_filtered[XCol])])
    ax.set_ylabel(YCol)
    ax.set_ylim([min(0,1.1*np.min(data_filtered[YCol])), 1.1*np.max(data_filtered[YCol])])
    ax.legend(loc='upper left')
    return(fig, ax)

These functions use the Bokeh library to display 1D categorical or 2D plots with interactive plots. They are less flexible but can be nice to explore the data set since you can display the cellID which is the source of each point by passing your pointer over it.

In [59]:
def D1PlotInteractive(data, CondCol='',Parameters=[],Filters=[],AvgPerCell=False,cellID='cellID'):
    data_filtered = data
    for fltr in Filters:
        data_filtered = data_filtered.loc[fltr]
        
#     print(data_filtered[cellID])
    if AvgPerCell:
        group = data_filtered.groupby(cellID)
        dictAggMean = getDictAggMean(data_filtered)
        data_filtered = group.agg(dictAggMean.pop(cellID)) #.reset_index(level=0, inplace=True)
        data_filtered.reset_index(level=0, inplace=True)
    
#     return(data_filtered)
    
    NPlots = len(Parameters)
    Conditions = list(data_filtered[CondCol].unique())
    if NPlots > 1:
        plots = []
        NCond = len(Conditions)
        data_filtered['X'] = 0
        data_filtered['X_jitter'] = 0.
        dictTicks = {}
        for i in range(NCond):
            mask = data_filtered[CondCol] == Conditions[i]
            data_filtered.loc[mask, 'X'] = i+1
            dictTicks[i+1] = Conditions[i]
        for i in data_filtered.index:
            data_filtered.loc[i, 'X_jitter'] = data_filtered.loc[i, 'X'] + 0.4*(np.random.rand(1)[0]-0.5)
        source = ColumnDataSource(
            data=data_filtered[[cellID]+[CondCol]+Parameters+['X','X_jitter']]
        )        
        
        for k in range(NPlots):
            hover = HoverTool(
                tooltips=[
                    ('Cell ID', "@"+cellID),
                    (Parameters[k], "@"+Parameters[k]),
                ]
            )
            index_cmap = factor_cmap(CondCol, palette=Category10[10], factors=sorted(data_filtered[CondCol].unique()), end=1)
            p = figure(plot_width=450, plot_height=500, tools=[hover], title="InteractivePlot") # 
            p.circle('X_jitter', Parameters[k], size=8, alpha = 0.6, source=source,fill_color=index_cmap,line_color='black')
            # Format
            p.x_range = Range1d(0, NCond+1)
            p.y_range = Range1d(min(0,1.1*np.min(data_filtered[Parameters[0]])), 1.1*np.max(data_filtered[Parameters[k]]))
            p.xaxis.ticker = [i for i in range(1,NCond+1)]
            p.xaxis.major_label_overrides = dictTicks
            p.xaxis.axis_label = CondCol
            p.xaxis.axis_label_text_font_size = '18pt'
            p.xaxis.major_label_text_font_size = '16pt'
            p.yaxis.axis_label = Parameters[k]
            p.yaxis.axis_label_text_font_size = '18pt'
            p.yaxis.major_label_text_font_size = '16pt'
            
            plots.append(p)
            
        p = gridplot(plots, ncols=2, toolbar_location=None)
        
        
    else:
        hover = HoverTool(
            tooltips=[
                ('Cell ID', "@"+cellID),
                (Parameters[0], "@"+Parameters[0]),
            ]
        )
        
        NCond = len(Conditions)
        data_filtered['X'] = 0
        data_filtered['X_jitter'] = 0.
        dictTicks = {}
        for i in range(NCond):
            mask = data_filtered[CondCol] == Conditions[i]
            data_filtered.loc[mask, 'X'] = i+1
            dictTicks[i+1] = Conditions[i]
        for i in data_filtered.index:
            data_filtered.loc[i, 'X_jitter'] = data_filtered.loc[i, 'X'] + 0.4*(np.random.rand(1)[0]-0.5)
        source = ColumnDataSource(
            data=data_filtered[[cellID]+[CondCol]+Parameters+['X','X_jitter']]
        )
        index_cmap = factor_cmap(CondCol, palette=Category10[10], factors=sorted(data_filtered[CondCol].unique()), end=1)
        TOOLS = "hover,pan,box_zoom,wheel_zoom,reset,save,help"
        p = figure(plot_width=500, plot_height=500, tools=TOOLS, title="InteractivePlot") # 
        p.circle('X_jitter', Parameters[0], size=8, alpha = 0.6, source=source,fill_color=index_cmap,line_color='black')
        # Format
        p.x_range = Range1d(0, NCond+1)
        p.y_range = Range1d(min(0,1.1*np.min(data_filtered[Parameters[0]])), 1.1*np.max(data_filtered[Parameters[0]]))
        p.xaxis.ticker = [i for i in range(1,NCond+1)]
        p.xaxis.major_label_overrides = dictTicks
        p.xaxis.axis_label = CondCol
        p.xaxis.axis_label_text_font_size = '18pt'
        p.xaxis.major_label_text_font_size = '16pt'
        p.yaxis.axis_label = Parameters[0]
        p.yaxis.axis_label_text_font_size = '18pt'
        p.yaxis.major_label_text_font_size = '16pt'
    return(p)

In [60]:
def D2PlotInteractive(data, XCol='',YCol='',CondCol='',Filters=[], cellID='cellID',AvgPerCell=False):
    
    data_filtered = data
    for fltr in Filters:
        data_filtered = data_filtered.loc[fltr]
        
    if AvgPerCell:
        group = data_filtered.groupby(cellID)
        dictAggMean = getDictAggMean(data_filtered)
        data_filtered = group.agg(dictAggMean.pop(cellID)) #.reset_index(level=0, inplace=True)
        data_filtered.reset_index(level=0, inplace=True)
    
    Conditions = list(data_filtered[CondCol].unique())

    NCond = len(Conditions)
    dictTicks = {}
    for i in range(NCond):
        dictTicks[i+1] = Conditions[i]
    
    source = ColumnDataSource(
        data=data_filtered[[cellID,CondCol,XCol,YCol]]
    )
    
    hover = HoverTool(
        tooltips=[
            ('Cell ID', "@"+cellID),
            (XCol, "@"+XCol),
            (YCol, "@"+YCol),
            (CondCol, "@"+CondCol),
        ]
    )
    
    index_cmap = factor_cmap(CondCol, palette=Category10[10], factors=sorted(data_filtered[CondCol].unique()), end=1)
    TOOLS = "pan,box_zoom,wheel_zoom,reset,save,help"
    p = figure(plot_width=900, plot_height=500, tools=TOOLS, title="InteractivePlot",toolbar_location="below") # 
    p.circle(XCol, YCol, size=8, alpha = 0.6, source=source,fill_color=index_cmap,line_color='black')
    p.add_tools(hover)
    # Format
    p.x_range = Range1d(0, 1.1*np.max(data_filtered[XCol]))
    p.y_range = Range1d(0, 1.1*np.max(data_filtered[YCol]))
    p.xaxis.axis_label = XCol
    p.xaxis.axis_label_text_font_size = '18pt'
    p.xaxis.major_label_text_font_size = '16pt'
    p.yaxis.axis_label = YCol
    p.yaxis.axis_label_text_font_size = '18pt'
    p.yaxis.major_label_text_font_size = '16pt'
    return(p)

Other subfunctions useful to plot

In [61]:
def makeOrder(*args):
    order = []
    listeTuple = list(itertools.product(*args, repeat=1))
    for tup in listeTuple:
        tmpText = ''
        for word in tup:
            tmpText += word
            tmpText += ' & '
        tmpText = tmpText[:-3]
        order.append(tmpText)
    return(order)


def makeBoxPairs(O):
    return(list(itertools.combinations(O, 2)))


def renameAxes(axes, rD):
    try:
        N = len(axes)
    except:
        axes = [axes]
        N = 1
    for i in range(N):
        # set xticks
        xticksTextObject = axes[i].get_xticklabels()
        xticksList = [xticksTextObject[j].get_text() for j in range(len(xticksTextObject))]
        test_hasXLabels = (len(''.join(xticksList)) > 0)
        if test_hasXLabels:
            newXticksList = [rD.get(k, k) for k in xticksList]
            axes[i].set_xticklabels(newXticksList)
        
        # set xlabel
        xlabel = axes[i].get_xlabel()
        newXlabel = rD.get(xlabel, xlabel)
        axes[i].set_xlabel(newXlabel)
        # set ylabel
        ylabel = axes[i].get_ylabel()
        newYlabel = rD.get(ylabel, ylabel)
        axes[i].set_ylabel(newYlabel)
        

def addStat(ax, data, box_pairs, param, cond, test = 'MannWhitney', percentHeight = 98):
    refHeight = np.percentile(data[param].values, percentHeight)
    currentHeight = refHeight
    scale = ax.get_yscale()
    xTicks = ax.get_xticklabels()
    dictXTicks = {xTicks[i].get_text() : xTicks[i].get_position()[0] for i in range(len(xTicks))}
    for bp in box_pairs:
        c1 = data[data[cond] == bp[0]][param].values
        c2 = data[data[cond] == bp[1]][param].values
        if test=='Mann-Whitney':
            statistic, pval = st.mannwhitneyu(c1,c2)
        elif test=='t-test':
            statistic, pval = st.ttest_ind(c1,c2)
        text = 'ns'
        if pval < 0.05 and pval > 0.01:
            text = '*'
        elif pval < 0.01 and pval > 0.001:
            text = '**'
        elif pval < 0.001 and pval < 0.001:
            text = '***'
        elif pval < 0.0001:
            text = '****'
        ax.plot([bp[0], bp[1]], [currentHeight, currentHeight], 'k-', lw = 1)
        XposText = (dictXTicks[bp[0]]+dictXTicks[bp[1]])/2
        if scale == 'log':
            power = 0.006 * (text=='ns') + 0.000 * (text!='ns')
            YposText = currentHeight*(refHeight**power)
        else:
            factor = 0.025 * (text=='ns') + 0.000 * (text!='ns')
            YposText = currentHeight + factor*refHeight
        ax.text(XposText, YposText, text, ha = 'center')
#         if text=='ns':
#             ax.text(posText, currentHeight + 0.025*refHeight, text, ha = 'center')
#         else:
#             ax.text(posText, currentHeight, text, ha = 'center')
        if scale == 'log':
            currentHeight = currentHeight*(refHeight**0.05)
        else:
            currentHeight =  currentHeight + 0.15*refHeight
    ax.set_ylim([ax.get_ylim()[0], currentHeight])
    

In [None]:
# Test of the averaging per cell routine

# data = GlobalTable_meca
# CondCol='drug'
# Parameters=['SurroundingThickness','EChadwick']
# Filters = [(GlobalTable_meca['Validated'] == 1)]
# AvgPerCell=True
# cellID='CellName'

# data_filtered = data
# for fltr in Filters:
#     data_filtered = data_filtered.loc[fltr]

# group = data_filtered.groupby(cellID)
# dictAggMean = getDictAggMean(data_filtered)
# data_filtered = group.agg(dictAggMean.pop(cellID)) #.reset_index(level=0, inplace=True)
# data_filtered.reset_index(level=0, inplace=True)
# data_filtered=data_filtered[[cellID]+[CondCol]+Parameters]
# print(data_filtered)

In [None]:
# Test of a routine to remove points of a list of XY positions where at least 1 of the coordinates is 'nan'

# XYraw = np.array([[np.nan, 2, 3, np.nan, 5], [10,20,30,40,50]])
# XYraw = XYraw.T
# XY = XYraw[~np.isnan(XYraw).any(axis=1), :]
# X, Y = XY[:,0], XY[:,1]
# X, Y

In [None]:
# Test of a routine to double each element in a list ; example [1, 2, 3] -> [1, 1, 2, 2, 3, 3]

# newnew_color_list = np.array([new_color_list, new_color_list])
# custom_color_list = list(np.array([new_color_list, new_color_list]).flatten(order='F'))
# custom_color_list

In [None]:
# Test of makeOrder function

# print(makeOrder(['none','doxycyclin'],['BSA coated glass','20um fibronectin discs']))
# print(makeOrder(['A','B']))
# print(makeOrder(['A','B'], ['C','D']))
# print(makeOrder(['A','B'], ['C','D'], ['E','F']))

In [68]:
# Test of makeBoxPairs function

O = makeOrder(['none','doxycyclin'],['BSA coated glass','20um fibronectin discs'])
makeBoxPairs(O)

[('none & BSA coated glass', 'none & 20um fibronectin discs'),
 ('none & BSA coated glass', 'doxycyclin & BSA coated glass'),
 ('none & BSA coated glass', 'doxycyclin & 20um fibronectin discs'),
 ('none & 20um fibronectin discs', 'doxycyclin & BSA coated glass'),
 ('none & 20um fibronectin discs', 'doxycyclin & 20um fibronectin discs'),
 ('doxycyclin & BSA coated glass', 'doxycyclin & 20um fibronectin discs')]

### Exploratory Graphs

#### Classic

In [None]:
rawMecaTable = getGlobalTable_meca()
# rawMecaTable.head()

In [None]:
# rawMecaTable.loc[]
# Agréger tous les M450 WT sous un même nom

In [None]:
Filters = [(rawMecaTable['Validated'] == 1), ((rawMecaTable['ExpType'] == 'DictyDB_M270') | (rawMecaTable['ExpType'] == 'DictyDB_M450')), (rawMecaTable['TpsComp'] == '1s')]
# Filters = [(rawMecaTable['Validated'] == 1), ((rawMecaTable['ExpType'] == 'DictyDB_M450')), (rawMecaTable['TpsComp'] == '1s')]
fig, ax = D1Plot(rawMecaTable, CondCol=['ExpType'],Parameters=['SurroundingThickness','EChadwick'],\
                 Filters=Filters,AvgPerCell=True,cellID='CellName')
fig.suptitle('M450 vs M270 for tps comp = 1s')
fig.show()
# rawMecaTable[Filters[0] & Filters[1] & Filters[2]]

In [None]:
Filters = [(rawMecaTable['Validated'] == 1), ((rawMecaTable['ExpType'] == 'DictyDB_M450'))] #  | (rawMecaTable['ExpType'] == 'DictyDB_M450-Multi')
fig, ax00 = D1Plot(rawMecaTable, CondCol=['TpsComp'],Parameters=['SurroundingThickness','EChadwick'],\
                   Filters=Filters,AvgPerCell=False,cellID='CellName',stats=False,figSizeFactor = 1.8,markerSizeFactor=0.5)
fig.suptitle('M450, various rates')
fig.show()

In [None]:
Filters = [(rawMecaTable['Validated'] == 1), ((rawMecaTable['ExpType'] == 'DictyDB_M270'))]
fig, ax01 = D1Plot(rawMecaTable, CondCol=['TpsComp'],Parameters=['SurroundingThickness','EChadwick'],Filters=Filters,\
                   AvgPerCell=False,cellID='CellName',stats=False,figSizeFactor = 1.8,markerSizeFactor=0.5)
fig.suptitle('M270, various rates')
fig.show()

In [None]:
Filters = [(GlobalTable_ctField['validated'] == True), (GlobalTable_ctField['medianThickness'] <= 1000)]
fig, ax = D1Plot(GlobalTable_ctField, CondCol=['drug','substrate'],Parameters=['medianThickness','fluctuAmpli'],Filters=Filters)
fig.suptitle('3T3aSFL on patterns: Ct Field')
fig.show()

In [None]:
Filters = [(GlobalTable_meca['Validated'] == 1), (GlobalTable_meca['cell subtype'] == 'aSFL')]
co_order = makeOrder([['none','doxycyclin'],['BSA coated glass','20um fibronectin discs']])
fig, ax = D1Plot(GlobalTable_meca, CondCol=['drug','substrate'],Parameters=['SurroundingThickness','EChadwick'],Filters=Filters,AvgPerCell=True,cellID='CellName', co_order=co_order)
fig.suptitle('3T3aSFL on diverse substrates: Compressions')
fig.show()

In [None]:
Filters = [(GlobalTable_meca['Validated'] == 1), (GlobalTable_meca['cell subtype'] == 'aSFL-6FP')]
fig, ax = D1Plot(GlobalTable_meca, CondCol=['drug','substrate'],Parameters=['SurroundingThickness','EChadwick'],Filters=Filters,AvgPerCell=True,cellID='CellName')
fig.suptitle('3T3aSFL on patterns: Compressions')
fig.show()

In [None]:
Filters = [(GlobalTable_meca['Validated'] == 1), (GlobalTable_meca['substrate'] == '20um fibronectin discs')]
co_order = makeOrder([['aSFL','aSFL-6FP'],['none','doxycyclin']])
fig, ax = D1Plot(GlobalTable_meca, CondCol=['cell subtype','drug'],Parameters=['SurroundingThickness','EChadwick'],Filters=Filters,AvgPerCell=True,cellID='CellName',co_order=co_order)
fig.suptitle('3T3aSFL on diverse substrates: Compressions')
fig.show()

In [None]:
Filters = [(GlobalTable_meca['Validated'] == 1), (GlobalTable_meca['substrate'] == '20um fibronectin discs')]
co_order = makeOrder([['aSFL','aSFL-6FP'],['none','doxycyclin']])
fig, ax = D1Plot(GlobalTable_meca, CondCol=['cell subtype','drug'],Parameters=['SurroundingThickness','EChadwick'],Filters=Filters,AvgPerCell=True,cellID='CellName',co_order=co_order)
fig.suptitle('3T3aSFL SHORT vs LONG linker: Compressions')
fig.show()

In [None]:
Filters = [(GlobalTable_ctField['validated'] == True), (GlobalTable_ctField['cell subtype'] == 'aSFL')]
fig, ax = D2Plot(GlobalTable_ctField, XCol='medianThickness',YCol='fluctuAmpli',CondCol = ['drug'], Filters=Filters, modelFit=True)
fig.suptitle('3T3aSFL - Dh = f(H)')
archiveFig(fig, ax, name='aSFL_Dh(h)_drug', figDir = todayFigDir, figSubDir='ThicknessPlots')
fig.show()

In [None]:
# Same as above without the 2 thickest cells
Filters = [(GlobalTable_ctField['validated'] == True), (GlobalTable_ctField['cell subtype'] == 'aSFL'), (GlobalTable_ctField['medianThickness'] <= 700)]
fig, ax = D2Plot(GlobalTable_ctField, XCol='medianThickness',YCol='fluctuAmpli',CondCol = ['drug'], Filters=Filters, modelFit=True)
fig.suptitle('3T3aSFL - Dh = f(H)')
archiveFig(fig, ax, name='aSFL_Dh(h)_drug_wo2LastPoints', figDir = todayFigDir, figSubDir='ThicknessPlots')
fig.show()

In [None]:
Filters = [(GlobalTable_meca['Validated'] == True), (GlobalTable_meca['cell subtype'] == 'aSFL')]
fig, ax = D2Plot(GlobalTable_meca, XCol='SurroundingThickness',YCol='EChadwick',CondCol = ['substrate','drug'],\
           Filters=Filters, cellID = 'CellName', AvgPerCell=True, modelFit=True, modelType='y=A*exp(kx)')
fig.suptitle('3T3aSFL: E(h)')
archiveFig(fig, ax, name='aSFL_E(h)_drug&substrate', figDir = todayFigDir, figSubDir='')
fig.show()

In [None]:
# xticksTextObject = ax.get_xticklabels()
# xticksList = [xticksTextObject[j].get_text() for j in range(len(xticksTextObject))]
# newXticksList = [renameDict1.get(k, k) for k in xticksList]
# newXticksList

In [92]:
Filters = [(GlobalTable_mecaBis['validatedFit'] == True), (GlobalTable_mecaBis['validatedThickness'] == True), (GlobalTable_mecaBis['substrate'] == '20um fibronectin discs')]
co_order = makeOrder(['aSFL','aSFL-A8','aSFL-6FP'],['none','doxycyclin'])
box_pairs=[('aSFL & none', 'aSFL & doxycyclin'),
 ('aSFL-A8 & none', 'aSFL-A8 & doxycyclin'),
 ('aSFL-6FP & none', 'aSFL-6FP & doxycyclin'),
 ('aSFL & none', 'aSFL-A8 & none'),
 ('aSFL & none', 'aSFL-6FP & none')]
fig, ax = D1Plot(GlobalTable_mecaBis, CondCol=['cell subtype','drug'],Parameters=['surroundingThickness','EChadwick'],\
                 Filters=Filters,AvgPerCell=False,cellID='cellID',co_order=co_order,box_pairs=box_pairs,stats=True)
renameAxes(ax,renameDict1)
fig.suptitle('3T3aSFL - All linker types: All Compressions')
fig.show()
# fig.savefig(todayFigDir + '//' + 'compressionsShortvsLongLinker_woStats.png')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …



#### Interactive

In [None]:
Filters = [(GlobalTable_ctField['validated'] == True), (GlobalTable_meca['cell subtype'] == 'aSFL')]

p = D1PlotInteractive(GlobalTable_ctField, CondCol='drug',Parameters=['medianThickness','fluctuAmpli'],Filters=Filters)
p.children[0][0].title.text = '3T3aSFL on patterns: Ct Field'
p.children[0][0].title.text_font_size = '16pt'
p.children[1][0].title.text = ''
show(p)

In [None]:
Filters = [(GlobalTable_meca['Validated'] == 1), (GlobalTable_meca['cell subtype'] == 'aSFL')]
p = D1PlotInteractive(GlobalTable_meca, CondCol='drug',Parameters=['SurroundingThickness','EChadwick'],Filters=Filters,AvgPerCell=True,cellID='CellName')
p.children[0][0].title.text = '3T3aSFL on patterns: Compressions'
p.children[0][0].title.text_font_size = '14pt'
p.children[1][0].title.text = ''
show(p)

In [None]:
Filters = [(GlobalTable_ctField['validated'] == True), (GlobalTable_meca['cell subtype'] == 'aSFL')]
p = D2PlotInteractive(GlobalTable_ctField, XCol='medianThickness',YCol='fluctuAmpli',CondCol = 'drug', Filters=Filters)
show(p)

In [None]:
Filters = [(GlobalTable_meca['Validated'] == True), (GlobalTable_meca['cell subtype'] == 'aSFL')]
p = D2PlotInteractive(GlobalTable_meca, XCol='SurroundingThickness',YCol='EChadwick',CondCol = 'drug', Filters=Filters, cellID = 'CellName')
show(p)

In [None]:
Filters = [(GlobalTable_ctField['validated'] == True)]
p = D2PlotInteractive(GlobalTable_ctField, XCol='meanFluoPeakAmplitude',YCol='medianThickness',CondCol = 'drug', Filters=Filters, cellID = 'cellID',AvgPerCell=True)
show(p)

In [None]:
Filters = [(GlobalTable_meca['Validated'] == True)]
p = D2PlotInteractive(GlobalTable_meca, XCol='meanFluoPeakAmplitude',YCol='SurroundingThickness',CondCol = 'drug', Filters=Filters, cellID = 'CellName',AvgPerCell=True)
p.title.text = '3T3aSFL expressing linker: H(fluo)'
p.title.text_font_size = '16pt'
show(p)

In [None]:
Filters = [(GlobalTable_meca['Validated'] == True), (GlobalTable_meca['cell subtype'] == 'aSFL')]
p = D2PlotInteractive(GlobalTable_meca, XCol='meanFluoPeakAmplitude', YCol='EChadwick', CondCol = 'drug', Filters=Filters, cellID = 'CellName',AvgPerCell=True)
p.title.text = 'aSFL expressing linker: E(fluo)'
p.title.text_font_size = '16pt'
show(p)

In [None]:
Filters = [(GlobalTable_meca['Validated'] == True), (GlobalTable_meca['cell subtype'] == 'aSFL')]
p = D2PlotInteractive(GlobalTable_meca, XCol='meanFluoPeakAmplitude',YCol='SurroundingThickness',CondCol = 'drug', Filters=Filters, cellID = 'CellName',AvgPerCell=True)
p.title.text = 'aSFL expressing linker: H(fluo)'
p.title.text_font_size = '16pt'
show(p)

In [None]:
Filters = [(GlobalTable_meca['Validated'] == True), (GlobalTable_meca['cell subtype'] == 'aSFL-6FP')]
p = D2PlotInteractive(GlobalTable_meca, XCol='meanFluoPeakAmplitude', YCol='EChadwick', CondCol = 'drug', Filters=Filters, cellID = 'CellName',AvgPerCell=True)
p.title.text = 'aSFL-6FP expressing long linker: E(fluo)'
p.title.text_font_size = '16pt'
show(p)

In [None]:
Filters = [(GlobalTable_meca['Validated'] == True), (GlobalTable_meca['cell subtype'] == 'aSFL-6FP')]
p = D2PlotInteractive(GlobalTable_meca, XCol='meanFluoPeakAmplitude', YCol='SurroundingThickness', CondCol = 'drug', Filters=Filters, cellID = 'CellName',AvgPerCell=True)
p.title.text = 'aSFL-6FP expressing long linker: H(fluo)'
p.title.text_font_size = '16pt'
show(p)

### Clean Graphs

In [93]:
renameDict1 = {'SurroundingThickness':'Surrounding Thickness (nm)',\
               'surroundingThickness':'Surrounding Thickness (nm)',\
               'EChadwick': 'E Chadwick (Pa)',\
               'medianThickness': 'Median Thickness (nm)',\
               'fluctuAmpli': 'Fluctuations Amplitude (nm)',\
               'meanFluoPeakAmplitude' : 'Fluo Intensity (a.u.)',\
               'none & BSA coated glass':'control & non adherent',\
               'doxycyclin & BSA coated glass':'iMC & non adherent',\
               'none & 20um fibronectin discs':'control & adherent on fibro',\
               'doxycyclin & 20um fibronectin discs':'iMC & adherent on fibro',\
               'BSA coated glass & none':'control & non adherent',\
               'BSA coated glass & doxycyclin':'iMC & non adherent',\
               '20um fibronectin discs & none':'control & adherent on fibro',\
               '20um fibronectin discs & doxycyclin':'iMC & adherent on fibro',\
               'aSFL & none':'aSFL control',\
               'aSFL & doxycyclin':'aSFL iMC',\
               'aSFL-6FP & none':'aSFL-6FP control',\
               'aSFL-6FP & doxycyclin':'aSFL-6FP long-iMC',\
               'aSFL-A8 & none':'aSFL-A8 control',\
               'aSFL-A8 & doxycyclin':'aSFL-A8 iMC'}

figSubDir = 'CleanPlots'

#### Thickness and module

In [None]:
Filters = [(GlobalTable_meca['Validated'] == 1), (GlobalTable_meca['cell subtype'] == 'aSFL')]
co_order = makeOrder(['BSA coated glass','20um fibronectin discs'],['none','doxycyclin'])
fig, ax = D1Plot(GlobalTable_meca, CondCol=['substrate','drug'],\
                 Parameters=['SurroundingThickness','EChadwick'],Filters=Filters,\
                 AvgPerCell=True, cellID='CellName', co_order=co_order)
renameAxes(ax,renameDict1)
fig.suptitle('3T3aSFL on diverse substrates: Compressions')
archiveFig(fig, ax, name='3T3aSFL_substrate&drug_SurroundingThickness&EChadwick', figSubDir = figSubDir)
fig.show()


In [None]:
Filters = [(GlobalTable_mecaBis['validatedFit'] == True), (GlobalTable_mecaBis['validatedThickness'] == True), (GlobalTable_mecaBis['cell subtype'] == 'aSFL')]
co_order = makeOrder(['BSA coated glass','20um fibronectin discs'],['none','doxycyclin'])
fig, ax = D1Plot(GlobalTable_mecaBis, CondCol=['substrate','drug'],\
                 Parameters=['surroundingThickness','EChadwick'],Filters=Filters,\
                 AvgPerCell=True, cellID='cellID', co_order=co_order)
renameAxes(ax,renameDict1)
fig.suptitle('3T3aSFL on diverse substrates: Compressions')
archiveFig(fig, ax, name='3T3aSFL_substrate&drug_SurroundingThickness&EChadwick_NEWTABLE', figSubDir = figSubDir)
fig.show()


In [None]:
# Filters = [(GlobalTable_meca['Validated'] == 1), (GlobalTable_meca['cell subtype'] == 'aSFL')]
# co_order = makeOrder(['BSA coated glass','20um fibronectin discs'],['none','doxycyclin'])
# fig, ax = D1Plot(GlobalTable_meca, CondCol=['substrate','drug'],\
#                  Parameters=['EChadwick'],Filters=Filters,\
#                  AvgPerCell=True, cellID='CellName', co_order=co_order,statMethod = 'Mann-Whitney', statVerbose = 2)
# renameAxes(ax,renameDict1)
# fig.suptitle('3T3aSFL on diverse substrates: Compressions')
# fig.show()

# # test value should be one of the following: t-test_ind, t-test_welch, t-test_paired, Mann-Whitney, Mann-Whitney-gt, Mann-Whitney-ls, Levene, Wilcoxon, Kruskal.

In [None]:
# Filters = [(GlobalTable_meca['Validated'] == 1), (GlobalTable_meca['cell subtype'] == 'aSFL')]
# co_order = makeOrder(['BSA coated glass','20um fibronectin discs'],['none','doxycyclin'])
# fig, ax = D1Plot(GlobalTable_meca, CondCol=['substrate','drug'],\
#                  Parameters=['SurroundingThickness','EChadwick'],Filters=Filters,\
#                  AvgPerCell=True, cellID='CellName', co_order=co_order, stats=False)
# renameAxes(ax,renameDict1)
# fig.suptitle('3T3aSFL on diverse substrates: Compressions')
# fig.show()
# # fig.savefig(todayFigDir + '//' + 'compressionsBSAvsFibro_woStats.png')

In [98]:
Filters = [(GlobalTable_ctField['validated'] == True), (GlobalTable_ctField['medianThickness'] <= 1000)]
co_order = makeOrder(['20um fibronectin discs'],['none','doxycyclin'])
fig, ax = D1Plot(GlobalTable_ctField, CondCol=['substrate','drug'],Parameters=['medianThickness','fluctuAmpli'],\
                 Filters=Filters,stats=True,co_order=co_order,figSizeFactor=0.5)
renameAxes(ax,renameDict1)
fig.suptitle('3T3aSFL on patterns: Constant Field')
archiveFig(fig, ax, name='3T3aSFL_drug_medianThickness', figSubDir = figSubDir)

fig.show()
# fig.savefig(todayFigDir + '//' + 'constantFieldFibro_woStats.png')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [91]:
Filters = [(GlobalTable_meca['Validated'] == 1), (GlobalTable_meca['substrate'] == '20um fibronectin discs')]
co_order = makeOrder(['aSFL','aSFL-6FP'],['none','doxycyclin'])
fig, ax = D1Plot(GlobalTable_meca, CondCol=['cell subtype','drug'],Parameters=['SurroundingThickness','EChadwick'],\
                 Filters=Filters,AvgPerCell=True,cellID='CellName',co_order=co_order,stats=True)
renameAxes(ax,renameDict1)
fig.suptitle('3T3aSFL short vs long linker: Compressions')
archiveFig(fig, ax, name='3T3aSFL_likerType&drug_SurroundingThickness&EChadwick', figSubDir = figSubDir)
fig.show()
# fig.savefig(todayFigDir + '//' + 'compressionsShortvsLongLinker_woStats.png')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [94]:
Filters = [(GlobalTable_mecaBis['validatedFit'] == True), (GlobalTable_mecaBis['validatedThickness'] == True), (GlobalTable_mecaBis['substrate'] == '20um fibronectin discs')]
co_order = makeOrder(['aSFL','aSFL-A8','aSFL-6FP'],['none','doxycyclin'])
box_pairs=[('aSFL & none', 'aSFL & doxycyclin'),
 ('aSFL-A8 & none', 'aSFL-A8 & doxycyclin'),
 ('aSFL-6FP & none', 'aSFL-6FP & doxycyclin'),
 ('aSFL & none', 'aSFL-A8 & none'),
 ('aSFL & none', 'aSFL-6FP & none')]
fig, ax = D1Plot(GlobalTable_mecaBis, CondCol=['cell subtype','drug'],Parameters=['surroundingThickness','EChadwick'],\
                 Filters=Filters,AvgPerCell=True,cellID='cellID',co_order=co_order,box_pairs=box_pairs,stats=True)
renameAxes(ax,renameDict1)
fig.suptitle('3T3aSFL - All linker types: Compressions')
archiveFig(fig, ax, name='3T3aSFL_likerType&drug_SurroundingThickness&EChadwick_NEWTABLE', figSubDir = figSubDir)
fig.show()
# fig.savefig(todayFigDir + '//' + 'compressionsShortvsLongLinker_woStats.png')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [97]:
Filters = [(GlobalTable_ctField['validated'] == True), (GlobalTable_ctField['cell subtype'] == 'aSFL')]
fig, ax = D2Plot(GlobalTable_ctField, XCol='medianThickness',YCol='fluctuAmpli',CondCol = ['drug'], Filters=Filters, modelFit=True)
fig.suptitle('3T3aSFL - Dh = f(H)')
archiveFig(fig, ax, name='aSFL_Dh(h)_drug', figDir = todayFigDir, figSubDir='ThicknessPlots')
fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Fitting condition: none with model y=ax+b
Y = 0.72424 X + -8.023
p-value on the 'a' coefficient: 2.4087e-08


Fitting condition: doxycyclin with model y=ax+b
Y = 1.8259 X + -154.51
p-value on the 'a' coefficient: 2.9304e-09




In [99]:
# Same as above without the 2 thickest cells
Filters = [(GlobalTable_ctField['validated'] == True), (GlobalTable_ctField['cell subtype'] == 'aSFL'), (GlobalTable_ctField['medianThickness'] <= 700)]
fig, ax = D2Plot(GlobalTable_ctField, XCol='medianThickness',YCol='fluctuAmpli',CondCol = ['drug'], Filters=Filters, modelFit=True)
fig.suptitle('3T3aSFL - Dh = f(H)')
archiveFig(fig, ax, name='aSFL_Dh(h)_drug_wo2LastPoints', figDir = todayFigDir, figSubDir='ThicknessPlots')
fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Fitting condition: none with model y=ax+b
Y = 0.57405 X + 25.221
p-value on the 'a' coefficient: 4.3807e-04


Fitting condition: doxycyclin with model y=ax+b
Y = 1.3657 X + -73.332
p-value on the 'a' coefficient: 7.3580e-03




In [None]:
Filters = [(GlobalTable_meca['Validated'] == True), (GlobalTable_meca['cell subtype'] == 'aSFL')]
fig, ax = D2Plot(GlobalTable_meca, XCol='SurroundingThickness',YCol='EChadwick',CondCol = ['substrate','drug'],\
           Filters=Filters, cellID = 'CellName', AvgPerCell=True, modelFit=False, modelType='y=A*exp(kx)')
fig.suptitle('3T3aSFL: E(h)')
archiveFig(fig, ax, name='aSFL_E(h)_drug&substrate', figDir = todayFigDir, figSubDir='')
fig.show()

#### Correlation with fluorescence

In [73]:
Filters = [(GlobalTable_meca['Validated'] == True), (GlobalTable_meca['cell subtype'] == 'aSFL')]
fig, ax = D2Plot(GlobalTable_meca, XCol='meanFluoPeakAmplitude', YCol='EChadwick', CondCol = ['drug'], \
                 Filters=Filters, cellID = 'CellName', AvgPerCell=True, modelFit=True)
renameAxes(ax,renameDict1)
fig.suptitle('aSFL expressing linker: E(fluo)')
archiveFig(fig, ax, name='aSFL_iMC_E(fluo)', figDir = todayFigDir, figSubDir='FluoPlots')
fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Fitting condition: doxycyclin with model y=ax+b
Y = 60.964 X + -3209.4
p-value on the 'a' coefficient: 1.3686e-05




In [76]:
Filters = [(GlobalTable_meca['Validated'] == True), (GlobalTable_meca['cell subtype'] == 'aSFL')]
fig, ax = D2Plot(GlobalTable_meca, XCol='meanFluoPeakAmplitude', YCol='EChadwick', CondCol = ['drug'], \
                 Filters=Filters, cellID = 'CellName', AvgPerCell=True, modelFit=True)
renameAxes(ax,renameDict1)
fig.suptitle('aSFL expressing linker: E(fluo)')
archiveFig(fig, ax, name='aSFL_iMC_E(fluo)', figDir = todayFigDir, figSubDir='FluoPlots')
fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Fitting condition: doxycyclin with model y=ax+b
Y = 60.964 X + -3209.4
p-value on the 'a' coefficient: 1.3686e-05




In [95]:
#Same as above without the lonely point
Filters = [(GlobalTable_meca['Validated'] == True), (GlobalTable_meca['cell subtype'] == 'aSFL'), (GlobalTable_meca['EChadwick'] <= 80000)]
fig, ax = D2Plot(GlobalTable_meca, XCol='meanFluoPeakAmplitude', YCol='EChadwick', CondCol = ['drug'], \
                 Filters=Filters, cellID = 'CellName', AvgPerCell=True, modelFit=True)
renameAxes(ax,renameDict1)
fig.suptitle('aSFL expressing linker: E(fluo)')
archiveFig(fig, ax, name='aSFL_iMC_E(fluo)_woLastPoint', figDir = todayFigDir, figSubDir='FluoPlots')
fig.show()



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Fitting condition: doxycyclin with model y=ax+b
Y = 28.067 X + 7441.9
p-value on the 'a' coefficient: 2.3306e-03




In [84]:
Filters = [(GlobalTable_mecaBis['validatedFit'] == True), ((GlobalTable_mecaBis['cell subtype'] == 'aSFL') | (GlobalTable_mecaBis['cell subtype'] == 'aSFL-A8')), (GlobalTable_mecaBis['drug'] == 'doxycyclin')]
fig, ax = D2Plot(GlobalTable_mecaBis, XCol='meanFluoPeakAmplitude', YCol='EChadwick', CondCol = ['cell subtype'], \
                 Filters=Filters, cellID = 'cellID', AvgPerCell=True, modelFit=True)
renameAxes(ax,renameDict1)
fig.suptitle('aSFL & aSFL-A8 expressing linker: E(fluo)')
archiveFig(fig, ax, name='aSFL&A8_iMC_E(fluo)', figDir = todayFigDir, figSubDir='FluoPlots')
fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Fitting condition: aSFL with model y=ax+b
Y = 34.971 X + 6183.4
p-value on the 'a' coefficient: 4.4968e-04


Fitting condition: aSFL-A8 with model y=ax+b
Y = -0.88389 X + 1.0349e+04
p-value on the 'a' coefficient: 7.5809e-01




In [89]:
#Same as above without the lonely point
Filters = [(GlobalTable_mecaBis['validatedFit'] == True), (GlobalTable_mecaBis['drug'] == 'doxycyclin'), \
           ((GlobalTable_mecaBis['cell subtype'] == 'aSFL') | (GlobalTable_mecaBis['cell subtype'] == 'aSFL-A8')), \
           (GlobalTable_mecaBis['meanFluoPeakAmplitude'] <= 1300)]
fig, ax = D2Plot(GlobalTable_mecaBis, XCol='meanFluoPeakAmplitude', YCol='EChadwick', CondCol = ['cell subtype'], \
                 Filters=Filters, cellID = 'cellID', AvgPerCell=True, modelFit=True)
renameAxes(ax,renameDict1)
fig.suptitle('aSFL & aSFL-A8 expressing linker: E(fluo)')
archiveFig(fig, ax, name='aSFL&A8_iMC_E(fluo)_woLastPoint', figDir = todayFigDir, figSubDir='FluoPlots')
fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Fitting condition: aSFL with model y=ax+b
Y = 35.713 X + 5943.1
p-value on the 'a' coefficient: 8.9267e-03


Fitting condition: aSFL-A8 with model y=ax+b
Y = -2.7511 X + 1.0983e+04
p-value on the 'a' coefficient: 6.3244e-01




In [None]:
Filters = [(GlobalTable_meca['Validated'] == True), (GlobalTable_meca['cell subtype'] == 'aSFL')]
fig, ax = D2Plot(GlobalTable_meca, XCol='meanFluoPeakAmplitude',YCol='SurroundingThickness',CondCol = ['drug'], Filters=Filters, cellID = 'CellName',AvgPerCell=True)
renameAxes(ax,renameDict1)
fig.suptitle('aSFL expressing linker: H(fluo)')
archiveFig(fig, ax, name='aSFL_iMC_H(fluo)', figDir = todayFigDir, figSubDir='FluoPlots')
fig.show()

In [None]:
Filters = [(GlobalTable_ctField['validated'] == True)]
fig, ax = D2Plot(GlobalTable_ctField, XCol='meanFluoPeakAmplitude',YCol='medianThickness',CondCol = ['drug'],\
                 Filters=Filters, cellID = 'cellID', AvgPerCell=True)
renameAxes(ax,renameDict1)
fig.suptitle('aSFL expressing linker: medianH(fluo)')
archiveFig(fig, ax, name='aSFL_iMC_medianH(fluo)', figDir = todayFigDir, figSubDir='FluoPlots')
fig.show()

In [None]:
Filters = [(GlobalTable_meca['Validated'] == True), (GlobalTable_meca['cell subtype'] == 'aSFL-6FP')]
fig, ax = D2Plot(GlobalTable_meca, XCol='meanFluoPeakAmplitude', YCol='EChadwick', CondCol = ['drug'], \
                 Filters=Filters, cellID = 'CellName', AvgPerCell=True, modelFit=True)
renameAxes(ax,renameDict1)
fig.suptitle('aSFL-6FP expressing long linker: E(fluo)')
archiveFig(fig, ax, name='aSFL-6FP_iMC_E(fluo)', figDir = todayFigDir, figSubDir='FluoPlots')
fig.show()

In [None]:
#Same as above without the lonely point
Filters = [(GlobalTable_meca['Validated'] == True), (GlobalTable_meca['cell subtype'] == 'aSFL-6FP'), (GlobalTable_meca['meanFluoPeakAmplitude'] <= 1000)]
fig, ax = D2Plot(GlobalTable_meca, XCol='meanFluoPeakAmplitude', YCol='EChadwick', CondCol = ['drug'], \
                 Filters=Filters, cellID = 'CellName', AvgPerCell=True, modelFit=True)
renameAxes(ax,renameDict1)
fig.suptitle('aSFL-6FP expressing long linker: E(fluo)')
archiveFig(fig, ax, name='aSFL-6FP_iMC_E(fluo)_woLastPoint', figDir = todayFigDir, figSubDir='FluoPlots')
fig.show()

In [None]:
Filters = [(GlobalTable_meca['Validated'] == True), (GlobalTable_meca['cell subtype'] == 'aSFL-6FP')]
fig, ax = D2Plot(GlobalTable_meca, XCol='meanFluoPeakAmplitude', YCol='SurroundingThickness', CondCol = ['drug'],\
                 Filters=Filters, cellID = 'CellName',AvgPerCell=True)
renameAxes(ax,renameDict1)
fig.suptitle('aSFL-6FP expressing long linker: H(fluo)')
archiveFig(fig, ax, name='aSFL-6FP_iMC_H(fluo)', figDir = todayFigDir, figSubDir='FluoPlots')
fig.show()

## Utility scripts

### Create a test table to test different fits

In [None]:
# Script !




testDir = os.path.join(dataDir, 'TestDataSet')
testFileName = 'testFitCompression.txt'
testFilePath = os.path.join(testDir, testFileName)

list_mecaFiles = [f for f in os.listdir(timeSeriesDataDir) \
                  if (os.path.isfile(os.path.join(timeSeriesDataDir, f)) and f.endswith(".csv") \
                  and ('R40' in f))] # Change to allow different formats in the future
expDf = getExperimentalConditions()
tableDictTest = {}

for f in list_mecaFiles:
    print(f)
    tS_DataFilePath = os.path.join(timeSeriesDataDir, f)
    tsDF = pd.read_csv(tS_DataFilePath, ';')
    
    split_f = f.split('_')
    tsDF.dx, tsDF.dy, tsDF.dz, tsDF.D2, tsDF.D3 = tsDF.dx*1000, tsDF.dy*1000, tsDF.dz*1000, tsDF.D2*1000, tsDF.D3*1000
    thisManipID = split_f[0] + '_' + split_f[1]
    expDf['manipID'] = expDf['date'] + '_' + expDf['manip']
    thisExpDf = expDf.loc[expDf['manipID'] == thisManipID]
    DIAMETER = thisExpDf.at[thisExpDf.index.values[0], 'bead diameter']
    thisCellID = split_f[0] + '_' + split_f[1] + '_' + split_f[2] + '_' + split_f[3]
    Ncomp = max(tsDF['idxCompression'])
    
    for i in range(1, Ncomp+1): #Ncomp+1):
        thisCompHiD = thisCellID + '__' + str(i) + '__h'
        thisCompFiD = thisCellID + '__' + str(i) + '__f'
        print(thisCompHiD)
        
        thisCompDf = tsDF.loc[tsDF['idxCompression'] == i, :]
        iStart = (findFirst(tsDF['idxCompression'], i))
        iStop = iStart + thisCompDf.shape[0]
        
        # Delimit the start of the increase of B (typically the moment when the field decrease from 5 to 3)
        # and the end of its decrease (typically when it goes back from 3 to 5)
        
        listB = thisCompDf.B.values
        offsetStart, offsetStop = 0, 0
        minB, maxB = min(listB), max(listB)
        thresholdB = (maxB-minB)/50
        k = 0
        
        while (listB[k] > minB+thresholdB) or (listB[-1-k] > minB+thresholdB):
            offsetStart += int(listB[k] > minB+thresholdB)
            k += 1
        
        jStart = offsetStart
        jMax = np.argmax(thisCompDf.B)
        
        hCompr = (thisCompDf.D3.values[jStart:jMax+1] - DIAMETER)
        fCompr = (thisCompDf.F.values[jStart:jMax+1])
        
        tableDictTest[thisCompHiD] = hCompr
        tableDictTest[thisCompFiD] = fCompr
        
saveFile = open(testFilePath, 'w')
for k in tableDictTest.keys():
    saveFile.write(k)
    for i in range(len(tableDictTest[k])):
        saveFile.write(';')
        saveFile.write(str(tableDictTest[k][i]))
    saveFile.write('\n')
saveFile.close()

### Create a test table to try statistical tests

In [None]:
# Script !

# Create a test table to try statistical tests

testDir = os.path.join(dataDir, 'TestDataSet')
GlobalTable_meca = getGlobalTable_meca()
table_ExpConditions = getExperimentalConditions()
table_fluo = getFluoData()
GlobalTable_meca = pd.merge(GlobalTable_meca, table_ExpConditions, how="inner", on='manipID',
#     left_on=None,right_on=None,left_index=False,right_index=False,sort=True,
#     suffixes=("_x", "_y"),copy=True,indicator=False,validate=None,
)
GlobalTable_meca = pd.merge(GlobalTable_meca, table_fluo, how="left", left_on='CellName', right_on='cellID'
#     left_on=None,right_on=None,left_index=False,right_index=False,sort=True,
#     suffixes=("_x", "_y"),copy=True,indicator=False,validate=None,
)
print('Merged table has ' + str(GlobalTable_meca.shape[0]) + ' lines and ' + str(GlobalTable_meca.shape[1]) + ' columns.')

In [None]:
# Table 1

testFileName = 'testStats01_allComp.csv'
testFilePath = os.path.join(testDir, testFileName)

Filters = [(GlobalTable_meca['Validated'] == 1), (GlobalTable_meca['cell subtype'] == 'aSFL')]
co_order = makeOrder([['BSA coated glass','20um fibronectin discs'],['none','doxycyclin']])
data = GlobalTable_meca
CondCol=['substrate','drug']
Parameters=['SurroundingThickness','EChadwick']
AvgPerCell=False
cellID='CellName'

data_filtered = data
for fltr in Filters:
    data_filtered = data_filtered.loc[fltr]

NCond = len(CondCol)    
if NCond == 1:
    CondCol = CondCol[0]
elif NCond > 1:
    newColName = ''
    for i in range(NCond):
        newColName += CondCol[i]
        newColName += ' & '
    newColName = newColName[:-3]
    data_filtered[newColName] = ''
    for i in range(NCond):
        data_filtered[newColName] += data_filtered[CondCol[i]].astype(str)
        data_filtered[newColName] = data_filtered[newColName].apply(lambda x : x + ' & ')
    data_filtered[newColName] = data_filtered[newColName].apply(lambda x : x[:-3])
    CondCol = newColName
    
if AvgPerCell:
    group = data_filtered.groupby(cellID)
    dictAggMean = getDictAggMean(data_filtered)
    data_filtered = group.agg(dictAggMean)

data_filtered.sort_values(CondCol, axis=0, ascending=True, inplace=True)

df_output = data_filtered[[cellID, 'CompNum', newColName] + Parameters]

# df_output.to_csv(testFilePath)

df_output

In [None]:
# Table 2

testFileName = 'testStats02_avgPerCell.csv'
testFilePath = os.path.join(testDir, testFileName)

Filters = [(GlobalTable_meca['Validated'] == 1), (GlobalTable_meca['cell subtype'] == 'aSFL')]
co_order = makeOrder([['BSA coated glass','20um fibronectin discs'],['none','doxycyclin']])
data = GlobalTable_meca
CondCol=['substrate','drug']
Parameters=['SurroundingThickness','EChadwick']
AvgPerCell=True
cellID='CellName'

data_filtered = data
for fltr in Filters:
    data_filtered = data_filtered.loc[fltr]

NCond = len(CondCol)    
if NCond == 1:
    CondCol = CondCol[0]
elif NCond > 1:
    newColName = ''
    for i in range(NCond):
        newColName += CondCol[i]
        newColName += ' & '
    newColName = newColName[:-3]
    data_filtered[newColName] = ''
    for i in range(NCond):
        data_filtered[newColName] += data_filtered[CondCol[i]].astype(str)
        data_filtered[newColName] = data_filtered[newColName].apply(lambda x : x + ' & ')
    data_filtered[newColName] = data_filtered[newColName].apply(lambda x : x[:-3])
    CondCol = newColName
    
if AvgPerCell:
    group = data_filtered.groupby(cellID)
    dictAggMean = getDictAggMean(data_filtered)
    data_filtered = group.agg(dictAggMean)

data_filtered.sort_values(CondCol, axis=0, ascending=True, inplace=True)

df_output = data_filtered[[cellID, 'CompNum', newColName] + Parameters]

# df_output.to_csv(testFilePath)

df_output

In [None]:
# Table 3
# Fake data

testDir = os.path.join(dataDir, 'TestDataSet')

testFileName = 'testStats03_FakeData.csv'
testFilePath = os.path.join(testDir, testFileName)

Npop = 10
Npoints = 30
minAvg = 100
maxAvg = 600
step = (maxAvg - minAvg)/(Npop-1)
std = 250
dictFakeData = {}
np.random.seed(11)

for i in [1, 2, 3, 10]:
    dictFakeData['Distribution_' + str(i)] = np.random.normal(loc=minAvg + step*(i-1), scale=std, size=Npoints)

dfFakeData = pd.DataFrame(dictFakeData)

# dfFakeData.to_csv(testFilePath)

dfFakeData

In [None]:
# Table 4
# Fake data 2

testDir = os.path.join(dataDir, 'TestDataSet')

testFileName = 'testStats04_FakeDataLarge.csv'
testFilePath = os.path.join(testDir, testFileName)

Npop = 10
Npoints = 300
minAvg = 100
maxAvg = 600
step = (maxAvg - minAvg)/(Npop-1)
std = 250
dictFakeData = {}
np.random.seed(11)

for i in [1, 2, 3, 10]:
    dictFakeData['Distribution_' + str(i)] = np.random.normal(loc=minAvg + step*(i-1), scale=std, size=Npoints)

dfFakeData = pd.DataFrame(dictFakeData)

# dfFakeData.to_csv(testFilePath)

dfFakeData

### Comparison of stat tests

#### With the fake data

In [None]:
testDir = os.path.join(dataDir, 'TestDataSet')

# Table 3
# Fake data
testFileName = 'testStats03_FakeData.csv'
testFilePath = os.path.join(testDir, testFileName)

dfFakeData = pd.read_csv(testFilePath)
dfFakeData = dfFakeData.drop(columns=['Unnamed: 0'])

Ncol = len(dfFakeData.columns)

refCol = dfFakeData[dfFakeData.columns[0]]
boxPlotMatrix = []

for i in range(0,Ncol):
    boxPlotMatrix.append(dfFakeData[dfFakeData.columns[i]].values)
    if i > 0:
        print('Comparison between distribution 1 and ' + str(i+1))
        tTest = st.ttest_ind(refCol.values, dfFakeData[dfFakeData.columns[i]].values)
        print('tTest : ' + str(tTest.pvalue))
        wilcox = st.wilcoxon(refCol.values, dfFakeData[dfFakeData.columns[i]].values)
        print('wilcox : ' + str(wilcox.pvalue))
        mannwhitneyu = st.mannwhitneyu(refCol.values, dfFakeData[dfFakeData.columns[i]].values)
        print('mannwhitneyu : ' + str(mannwhitneyu.pvalue))
        print('')

fig, ax = plt.subplots(1,1)
ax.boxplot(boxPlotMatrix,labels=dfFakeData.columns.values)
ax.tick_params(axis='x', labelrotation = 15, labelsize = 7)
fig.show()

In [None]:
testDir = os.path.join(dataDir, 'TestDataSet')

# Table 4
# Fake data Large
testFileName = 'testStats04_FakeDataLarge.csv'
testFilePath = os.path.join(testDir, testFileName)

dfFakeData = pd.read_csv(testFilePath)
dfFakeData = dfFakeData.drop(columns=['Unnamed: 0'])

Ncol = len(dfFakeData.columns)

refCol = dfFakeData[dfFakeData.columns[0]]
boxPlotMatrix = []

for i in range(0,Ncol):
    boxPlotMatrix.append(dfFakeData[dfFakeData.columns[i]].values)
    if i > 0:
        print('Comparison between distribution 1 and ' + str(i+1))
        tTest = st.ttest_ind(refCol.values, dfFakeData[dfFakeData.columns[i]].values)
        print('tTest : ' + str(tTest.pvalue))
        wilcox = st.wilcoxon(refCol.values, dfFakeData[dfFakeData.columns[i]].values)
        print('wilcox : ' + str(wilcox.pvalue))
        mannwhitneyu = st.mannwhitneyu(refCol.values, dfFakeData[dfFakeData.columns[i]].values)
        print('mannwhitneyu : ' + str(mannwhitneyu.pvalue))
        print('')

fig, ax = plt.subplots(1,1)
ax.boxplot(boxPlotMatrix,labels=dfFakeData.columns.values)
ax.tick_params(axis='x', labelrotation = 15, labelsize = 7)
fig.show()

In [None]:
# Summary of results: numpy.random.seed = 11
    
dictResultsFakeData = {}
dictResultsFakeData['language'] = ['python'   , 'python', 'python'      , 'R'     , 'R'          , 'Matlab' , 'Matlab' ]
dictResultsFakeData['test']     = ['ttest_ind', 'wilcox', 'mannwhitneyu', 't.test', 'wilcox.test', 'ttest2' , 'ranksum']
dictResultsFakeData['1 vs 2']   = [0.2700     , 0.2711  , 0.1353        , 0.2701  , 0.2729       , 0.2700   , 0.2707   ]
dictResultsFakeData['1 vs 3']   = [0.0714     , 0.0822  , 0.0452        , 0.0715  , 0.0906       , 0.0714   , 0.0905   ]
dictResultsFakeData['1 vs 10']  = [1.33e-11   , 4.28e-06, 2.98e-09      , 1.44e-11, 5.799e-11    , 1.33e-11 , 5.96e-09 ]
dfResultFakeData = pd.DataFrame(dictResultsFakeData)
dfResultFakeData

In [None]:
# Summary of results: numpy.random.seed = 11

dictResultsFakeDataLarge = {}
dictResultsFakeDataLarge['language'] = ['python'   , 'python', 'python'      , 'R'     , 'R'          , 'Matlab' , 'Matlab' ]
dictResultsFakeDataLarge['test']     = ['ttest_ind', 'wilcox', 'mannwhitneyu', 't.test', 'wilcox.test', 'ttest2' , 'ranksum']
dictResultsFakeDataLarge['1 vs 2']   = [0.0082     , 0.0049  , 0.0038        , 0.0082  , 0.0077       , 0.0082   , 0.0077   ]
dictResultsFakeDataLarge['1 vs 3']   = [1.26e-06   , 9.29e-06, 1.37e-06      , 1.27e-06, 2.75e-06     , 1.26e-06 , 2.74e-06 ]
dictResultsFakeDataLarge['1 vs 10']  = [1.74e-98   , 5.64e-48, 6.39e-74      , 2.2e-16 , 2.2e-16      , 1.74e-98 , 1.27e-73 ]
dictResultsFakeDataLarge = pd.DataFrame(dictResultsFakeDataLarge)
dictResultsFakeDataLarge

#### With real data

In [None]:
# testDir = os.path.join(dataDir, 'TestDataSet')

# Table 1
# Avg Per Cell
testFileName = 'testStats01_AvgPerCell.csv'
testFilePath = os.path.join(testDir, testFileName)

dfAvgPerCell = pd.read_csv(testFilePath)
# dfAvgPerCell = dfAvgPerCell.drop(columns=['Unnamed: 0'])
dfAvgPerCell
categories = list(dfAvgPerCell['substrate & drug'].unique())
Ncat = len(categories)
for i in range(Ncat):
    for j in range(i,Ncat):
        if j != i:
            x = dfAvgPerCell.loc[dfAvgPerCell['substrate & drug'] == categories[i], 'EChadwick'].values
            y = dfAvgPerCell.loc[dfAvgPerCell['substrate & drug'] == categories[j], 'EChadwick'].values
            print('Comparison between ' + categories[i] + ' and ' + categories[j])
            tTest = st.ttest_ind(x, y)
            print('tTest : ' + str(tTest.pvalue))
            mannwhitneyu = st.mannwhitneyu(x, y)
            print('mannwhitneyu : ' + str(mannwhitneyu.pvalue))
            print('')

# refCol = dfFakeData[dfFakeData.columns[0]]
# boxPlotMatrix = []

# for i in range(0,Ncol):
#     boxPlotMatrix.append(dfFakeData[dfFakeData.columns[i]].values)
#     if i > 0:
#         print('Comparison between distribution 1 and ' + str(i+1))
#         tTest = st.ttest_ind(refCol.values, dfFakeData[dfFakeData.columns[i]].values)
#         print('tTest : ' + str(tTest.pvalue))
#         wilcox = st.wilcoxon(refCol.values, dfFakeData[dfFakeData.columns[i]].values)
#         print('wilcox : ' + str(wilcox.pvalue))
#         mannwhitneyu = st.mannwhitneyu(refCol.values, dfFakeData[dfFakeData.columns[i]].values)
#         print('mannwhitneyu : ' + str(mannwhitneyu.pvalue))
#         print('')

# fig, ax = plt.subplots(1,1)
# ax.boxplot(boxPlotMatrix,labels=dfFakeData.columns.values)
# ax.tick_params(axis='x', labelrotation = 15, labelsize = 7)
# fig.show()

In [None]:
dictResultsAvgPerCell = {}
order = ['language','test','c & NA vs iMC & A','c & NA vs c & A','iMC & NA vs iMC & A','c & NA vs iMC & NA','iMC & NA vs c & A','c & A vs iMC & A']
dictResultsAvgPerCell['language']            = ['python_statAnot'   , 'python_statAnot' ,'python'   , 'python', 'R'      , 'R'          , 'Matlab' , 'Matlab' ]
dictResultsAvgPerCell['test']                = ['ttest_ind', 'mannwhitneyu', 'ttest_ind', 'mannwhitneyu', 't.test' , 'wilcox.test', 'ttest2' , 'ranksum']
dictResultsAvgPerCell['c & NA vs iMC & NA']  = [1.000e+00  , 5.669e-01     ,0.19226082553146542  , 0.047244130659518     , 0.1503   , 0.09502       , 0.1923       , 0.0945]
dictResultsAvgPerCell['iMC & NA vs c & A']   = [9.673e-02  , 8.625e-03     ,0.016121864893694285  ,0.0007187494204219925     , 0.04082  , 0.0009726    , 0.0161     , 0.0014]
dictResultsAvgPerCell['c & A vs iMC & A']    = [2.331e-02  , 1.376e-01     ,0.00388458593467288  , 0.01146586677766893     , 0.007326 , 0.02214      , 0.0039      , 0.0229]
dictResultsAvgPerCell['c & NA vs c & A']     = [1.000e+00  , 1.000e+00     ,0.6977550928576132  , 0.2884535746840493     , 0.6948   , 0.5838       , 0.6978     , 0.5769]
dictResultsAvgPerCell['iMC & NA vs iMC & A'] = [1.000e+00  , 1.000e+00     ,0.5573451346686198  , 0.41831870120029446,  0.5031  , 0.8387       , 0.5573      , 0.8366 ]
dictResultsAvgPerCell['c & NA vs iMC & A']   = [9.726e-01  , 1.000e+00     ,0.16209530366557973  , 0.14893352365754048     , 0.04353  , 0.3043       , 0.1621       , 0.2979 ]
dfResultsAvgPerCell = pd.DataFrame(dictResultsAvgPerCell)
dfResultsAvgPerCell[order]

# dfResultsAvgPerCell = dfResultsAvgPerCell.sort_values(by='test',ascending=False)
# dfResultsAvgPerCell

## Old Code

### Data formatting and filtering

TBC

In [None]:
mecaDataFile = 'Global_MecaData.csv'
mecaDataFilePath = os.path.join(dataDir, mecaDataFile)
mecaDF = pd.read_csv(mecaDataFilePath, sep=';')
print('Extracted a table with ' + str(mecaDF.shape[0]) + ' lines and ' + str(mecaDF.shape[1]) + ' columns.')

mecaDF = mecaDF.rename(columns={"CellID": "CellName", "CellName": "CellID"})

In [None]:
# mecaDF

In [None]:
experimentalDataFile = 'ExperimentalConditions.csv'
experimentalDataFilePath = os.path.join(experimentalDataDir, experimentalDataFile)
expConditionsDF = pd.read_csv(experimentalDataFilePath, sep=';',header=0)
print('Extracted a table with ' + str(expConditionsDF.shape[0]) + ' lines and ' + str(expConditionsDF.shape[1]) + ' columns.')

# Cleaning the table
try:
    expConditionsDF = expConditionsDF.convert_dtypes()

    listTextColumns = []
    for col in expConditionsDF.columns:
        if expConditionsDF[col].dtype == 'string':
            listTextColumns.append(col)

    expConditionsDF[listTextColumns] = expConditionsDF[listTextColumns].apply(lambda x: x.str.replace(',','.'))

    expConditionsDF['scale pixel per um'] = expConditionsDF['scale pixel per um'].astype(float)
    expConditionsDF['optical index correction'] = \
              expConditionsDF['optical index correction'].apply(lambda x: x.split('/')[0]).astype(float) \
            / expConditionsDF['optical index correction'].apply(lambda x: x.split('/')[1]).astype(float)
    expConditionsDF['magnetic field correction'] = expConditionsDF['magnetic field correction'].astype(float)
    expConditionsDF['with fluo images'] = expConditionsDF['with fluo images'].astype(bool)

    expConditionsDF['ramp field'] = \
    expConditionsDF['ramp field'].apply(lambda x: [x.split(';')[0], x.split(';')[1]] if not pd.isnull(x) else [])

except:
    print('Unexpected bug with the cleaning step')

In [None]:
expConditionsDF

In [None]:
# Unused for now
cellDescriptionDataFile = 'CellDescription.csv'
cellDescriptionDataFilePath = os.path.join(experimentalDataDir, cellDescriptionDataFile)
cellDescriptionDF = pd.read_csv(cellDescriptionDataFilePath, ',')
print('Extracted a table with ' + str(cellDescriptionDF.shape[0]) + ' lines and ' + str(cellDescriptionDF.shape[1]) + ' columns.')

In [None]:
mecaDF['ManipID'] = mecaDF['ExpDay'] + '_' + mecaDF['CellName'].apply(lambda x: x.split('_')[0])
expConditionsDF['ManipID'] = expConditionsDF['date'] + '_' + expConditionsDF['manip']

mainMecaDF = pd.merge(
    expConditionsDF,
    mecaDF,
    how="inner",
    on='ManipID',
#     left_on=None,
#     right_on=None,
#     left_index=False,
#     right_index=False,
#     sort=True,
#     suffixes=("_x", "_y"),
#     copy=True,
#     indicator=False,
#     validate=None,
)

In [None]:
# pd.set_option('display.max_columns', None)
# mainMecaDF.head()

In [None]:
pd.reset_option('max_columns')

#### * Data filtering

In [None]:
# pd.set_option('display.max_columns', None)
# mainMecaDF.head()

In [None]:
# pd.reset_option('max_columns')
# mainMecaDF.columns

In [None]:
mainMecaDF_f = mainMecaDF.loc[(mainMecaDF["Validated"] == 1)]
# mainMecaDF_f

In [None]:
%matplotlib inline
listCells = mainMecaDF_f['CellID'].drop_duplicates().astype('string').values
timeSeriesDict = {}
for cell in listCells:
    currentCell_TimeSeriesData = getCellTimeSeriesData(cell)
    timeSeriesDict[cell] = currentCell_TimeSeriesData
start, stop = 80, 100
fig, axes = plt.subplots((stop-start),1, figsize = (7,4*(stop-start)))
fig.tight_layout()
for k in range(start, stop):
    if k < len(listCells):
        currentCell_TimeSeriesData = timeSeriesDict[listCells[k]]
        T = currentCell_TimeSeriesData['T'].values
        idxCompression = currentCell_TimeSeriesData['idxCompression'].values
        D3 = currentCell_TimeSeriesData['D3'].values
        maskConstant = (idxCompression == 0)
        maskCompression = (idxCompression > 0)
        axes[k - start].plot(T, D3*1000-4503, 'k-', linewidth = 0.5)
        axes[k - start].plot(T[maskCompression], D3[maskCompression]*1000-4503, 'ro', markersize=2)
        axes[k - start].plot(T[maskConstant], D3[maskConstant]*1000-4503, 'co', markersize=2)
        axes[k - start].set_title(listCells[k])
        axes[k - start].set_xlabel('T (s)')
        axes[k - start].set_ylabel('D3 (µm)')

In [None]:
addExcludedCell('21-01-18_M1_P1_C2', 'passive')
addExcludedCell('21-01-18_M1_P1_C3', 'passive')
addExcludedCell('21-01-18_M1_P1_C4', 'passive')
addExcludedCell('21-01-21_M3_P1_C4', 'passive')
addExcludedCell('21-01-21_M3_P1_C5', 'passive')
addExcludedCell('20-08-07_M1_P1_C6', 'too thick')
addExcludedCell('20-08-07_M1_P1_C62', 'too thick')

excludedCellsDict = getExcludedCells()
# # excludedMask = (mainMecaDF_f["CellID"].values not in excludedCellsDict.keys())
# # mainMecaDF_f = mainMecaDF_f.loc[(mainMecaDF_f["CellID"].values not in excludedCellsDict.keys())]
# for i in range(len(excludedCellsDict)):
#     print('a')
# mainMecaDF_f["CellID"].drop_duplicates().astype('string').values
excludedCellsDict

In [None]:
# currentCell_TimeSeriesData

In [None]:
mainMecaDF_GroupedPerCell = mainMecaDF_f.groupby('CellID')
mainMecaDF_DataPerCell = mainMecaDF_GroupedPerCell.agg({"EChadwick": np.median, "SurroundingThickness": np.median, "H0Chadwick" : np.median})
# mainMecaDF_GroupedPerCell.agg({"EChadwick": np.median, "D": lambda x: np.std(x, ddof=1)})
cols = ['date', 'manip', 'experimentType', 'drug', 'substrate',
       'objective magnification', 'scale pixel per um', 'objective immersion',
       'optical index correction', 'magnetic field correction', 'cell type',
       'cell subtype', 'bead type', 'bead diameter', 'normal field',
       'ramp field', 'compression duration', 'with fluo images', 'comments',
       'ManipID', 'ExpType', 'CellName', 'CellID']
mainMecaDF_DataPerCell.dropna(inplace = True)
mainMecaDF_DataPerCell = pd.merge(mainMecaDF_DataPerCell,
                                  mainMecaDF_f[cols].drop_duplicates(subset=['CellID']),
                                  how="inner",
                                  on='CellID',
                                  #     left_on='CellID',
                                  #     right_on='CellID',
                                  #     left_index=False,
                                  #     right_index=False,
                                  #     sort=True,
                                  #     suffixes=("_x", "_y"),
                                  #     copy=True,
                                  #     indicator=False,
                                  #     validate=None,
                                  )
# mainMecaDF_DataPerCell

In [None]:
GlobalTable_meca_Count = GlobalTable_meca.groupby(['cell type', 'cell subtype', 'bead type', 'drug', 'substrate']).count()
GlobalTable_meca_Count.loc[:, ['CellID']].rename(columns={'CellID' : 'Count'})

### Other old code

In [None]:
# fig, axes = plt.subplots(1,2, figsize = (8, 5))
# axes[0].plot(np.ones(len(resDict['nodrug']['median'])), resDict['nodrug']['median'], 'co')
# axes[0].plot(2*np.ones(len(resDict['doxy']['median'])), resDict['doxy']['median'], 'ro')
# axes[0].set_xlim(0.5, 2.5)
# axes[0].set_ylabel('Median Thickness (nm)')
# axes[0].set_xticks([1,2])
# axes[0].set_xticklabels(['Control','Doxycylin'])
# axes[1].plot(np.ones(len(resDict['nodrug']['fluctu'])), resDict['nodrug']['fluctu'], 'co')
# axes[1].plot(2*np.ones(len(resDict['doxy']['fluctu'])), resDict['doxy']['fluctu'], 'ro')
# axes[1].set_xlim(0.5, 2.5)
# axes[1].set_ylabel('Thickness Fluctuations (nm)')
# axes[1].set_xticks([1,2])
# axes[1].set_xticklabels(['Control','Doxycylin'])
# fig.savefig("C://Users//JosephVermeil//Desktop//ActinCortexAnalysis//DataAnalysis//constantField.png")
# fig.show()

In [None]:
# Old code
conditions = ['nodrug', 'doxy']
correspondance = {conditions[0] : 'M1', conditions[1] : 'M2'}
allTimeSeriesDataFiles = [f for f in os.listdir(timeSeriesDataDir) if (os.path.isfile(os.path.join(timeSeriesDataDir, f)) and f.endswith(".txt"))]
dates = ['21-02-10']
resDict = {conditions[0] : {}, conditions[1] : {}}
for C in conditions:
    resDict[C]['accepted'] = []
    resDict[C]['rejected'] = []
    resDict[C]['median'] = []
    resDict[C]['fluctu'] = []
    for D in dates:
        for f in allTimeSeriesDataFiles:
            if correspondance[C] in f and D in f:
                split_f = f.split('_')
                cellID = split_f[0] + '_' + split_f[1] + '_' + split_f[2] + '_' + split_f[3]
                currentCellTS = getCellTimeSeriesData(cellID)
                D3 = currentCellTS.D3.values
                decile_1 = np.percentile(D3, 10)
                median = np.median(D3)
                decile_9 = np.percentile(D3, 90)
                if decile_1 < 0:
                    resDict[C]['rejected'].append(cellID)
                else:
                    resDict[C]['accepted'].append(cellID)
                    resDict[C]['median'].append(median)
                    resDict[C]['fluctu'].append(decile_9-decile_1)
#resDict

In [None]:
GlobalTable_ctField = getGlobalTable_ctField()
table_ExpConditions = getExperimentalConditions()
GlobalTable_ctField = pd.merge(table_ExpConditions, GlobalTable_ctField, how="inner", on='manipID',
#     left_on=None,right_on=None,left_index=False,right_index=False,sort=True,
#     suffixes=("_x", "_y"),copy=True,indicator=False,validate=None,
)

# data=pd.DataFrame(dict(
#             x=[1, 2, 3, 4, 5],
#             y=[2, 5, 8, 2, 7],
#             desc=['A', 'A', 'C', 'd', 'E'],
#         ))

data = GlobalTable_ctField[['medianThickness','fluctuAmpli','cellID']]

source = ColumnDataSource(
        data=data
    )

# hover = HoverTool(
#         tooltips=[
#             ("index", "$index"),
#             ("(x,y)", "($x, $y)"),
#             ("desc", "@desc"),
#         ]
#     )

hover = HoverTool(
        tooltips=[
            ("medianThickness", "@medianThickness"),
            ("fluctuAmpli", "@fluctuAmpli"),
        ]
    )

p = figure(plot_width=300, plot_height=300, tools=[hover], title="Mouse over the dots")

p.circle('medianThickness', 'fluctuAmpli', size=20, source=data)

show(p)

data

In [None]:
data=pd.DataFrame(dict(
            x=[1, 2, 3, 4, 5],
            y=[2, 5, 8, 2, 7],
            desc=['A', 'A', 'C', 'd', 'E'],
        ))

Conditions = list(data['desc'].unique())
NCond = len(Conditions)
data['X'] = 0
for i in range(NCond):
    mask = data['desc'] == Conditions[i]
    data.loc[mask, ['X']] = i+1
data.index = data.x
data = data.drop(['x'], axis = 1)

data.reset_index(level=0, inplace=True)
data