In [278]:
import numpy as np
import matplotlib
%matplotlib widget
import matplotlib.pyplot as plt
import pandas as pd
import os
from copy import copy
matplotlib.rcParams.update({'figure.autolayout': True})

In [252]:
%matplotlib

Using matplotlib backend: module://ipympl.backend_nbagg


In [253]:
# %matplotlib inline
SMALL_SIZE = 10
MEDIUM_SIZE = 12
BIGGER_SIZE = 16

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=MEDIUM_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

In [254]:
mainDir = "C://Users//JosephVermeil//Desktop//ActinCortexAnalysis"
experimentalDataDir = os.path.join(mainDir, "ExperimentalData")
dataDir = os.path.join(mainDir, "DataAnalysis")
timeSeriesDataDir = os.path.join(dataDir, "TimeSeriesData")
allTimeSeriesDataFiles = [f for f in os.listdir(timeSeriesDataDir) if (os.path.isfile(os.path.join(timeSeriesDataDir, f)) and f.endswith(".txt"))]
allTimeSeriesDataFiles

['20-08-04_M1_P1_C5_R40.txt',
 '20-08-04_M1_P1_C6_R40.txt',
 '20-08-04_M1_P1_C7_R40.txt',
 '20-08-04_M2_P1_C3_R40.txt',
 '20-08-04_M2_P1_C4_R40.txt',
 '20-08-04_M2_P1_C5_R40.txt',
 '20-08-05_M1_P1_C32_R40.txt',
 '20-08-05_M1_P1_C3_R40.txt',
 '20-08-05_M1_P1_C4_R40.txt',
 '20-08-05_M1_P1_C5_R40.txt',
 '20-08-05_M1_P1_C6_R40.txt',
 '20-08-05_M1_P1_C7_R40.txt',
 '20-08-05_M1_P1_C8_R40.txt',
 '20-08-05_M2_P1_C2_R40.txt',
 '20-08-05_M2_P1_C3_R40.txt',
 '20-08-05_M2_P1_C4_R40.txt',
 '20-08-05_M2_P1_C5_R40.txt',
 '20-08-05_M2_P1_C6_R40.txt',
 '20-08-05_M2_P1_C7_R40.txt',
 '20-08-07_M1_P1_C3_R40.txt',
 '20-08-07_M1_P1_C4_R40.txt',
 '20-08-07_M1_P1_C62_R40.txt',
 '20-08-07_M1_P1_C6_R40.txt',
 '20-08-07_M1_P1_C7_R40.txt',
 '20-08-07_M1_P1_C8_R40.txt',
 '20-08-07_M2_P1_C4_R40.txt',
 '20-08-07_M2_P1_C5_R40.txt',
 '20-08-07_M2_P1_C6_R40.txt',
 '20-08-07_M2_P1_C7_R40.txt',
 '21-01-18_M1_P1_C1_R40_disc20um.txt',
 '21-01-18_M1_P1_C2_R40_disc20um.txt',
 '21-01-18_M1_P1_C3_R40_disc20um.txt',
 '21-01-18_

### Utility subfunctions

In [327]:
def getCellTimeSeriesData(cellID):
    allTimeSeriesDataFiles = [f for f in os.listdir(timeSeriesDataDir) if (os.path.isfile(os.path.join(timeSeriesDataDir, f)) and f.endswith(".txt"))]
    fileFound = False
    nFile = len(allTimeSeriesDataFiles)
    iFile = 0
    while (not fileFound) and (iFile < nFile):
        f = allTimeSeriesDataFiles[iFile]
        if f.startswith(cellID):
            timeSeriesDataFilePath = os.path.join(timeSeriesDataDir, f)
            timeSeriesDataFrame = pd.read_csv(timeSeriesDataFilePath, ',')
            fileFound = True
        iFile += 1
    if not fileFound:
        timeSeriesDataFrame = pd.DataFrame([])
    return(timeSeriesDataFrame)

def plotCellTimeSeriesData(cellID):
    X = 'T'
    Y = np.array(['B', 'F', 'dx', 'dy', 'dz', 'D2', 'D3'])
    units = np.array([' (mT)', ' (pN)', ' (µm)', ' (µm)', ' (µm)', ' (µm)', ' (µm)'])
    timeSeriesDataFrame = getCellTimeSeriesData(cellID)
    if not timeSeriesDataFrame.size == 0:
#         plt.tight_layout()
#         fig.show() # figsize=(20,20)
        axes = timeSeriesDataFrame.plot(x=X, y=Y, kind='line', ax=None, subplots=True, sharex=True, sharey=False, layout=None, \
                       figsize=(8,10), use_index=True, title = cellID + '- Time dependant data', grid=None, legend=False, style=None, logx=False, logy=False, \
                       loglog=False, xticks=None, yticks=None, xlim=None, ylim=None, rot=None, fontsize=None, colormap=None, \
                       table=False, yerr=None, xerr=None, secondary_y=False, sort_columns=False)
        plt.gcf().tight_layout()
        for i in range(len(Y)):
            axes[i].set_ylabel(Y[i] + units[i])
        
    else:
        print('cell not found')
        
def addExcludedCell(cellID, motive):
    f = open(os.path.join(experimentalDataDir, 'ExcludedCells.txt'), 'r')
    lines = f.readlines()
    nLines = len(lines)
    excludedCellsList = []
    for iLine in range(nLines):
        line = lines[iLine]
        splitLine = line[:-1].split(',')
        excludedCellsList.append(splitLine[0])
    if cellID in excludedCellsList:
        newlines = copy(lines)
        iLineOfInterest = excludedCellsList.index(cellID)
        if motive not in newlines[iLineOfInterest][:-1].split(','):
            newlines[iLineOfInterest] = newlines[iLineOfInterest][:-1] + ',' + motive + '\n'            
    else:
        newlines = copy(lines)
        newlines.append('' + cellID + ',' + motive + '\n')
    f.close()
    f = open(os.path.join(experimentalDataDir, 'ExcludedCells.txt'), 'w')
    f.writelines(newlines)
    
def getExcludedCells():
    f = open(os.path.join(experimentalDataDir, 'ExcludedCells.txt'), 'r')
    lines = f.readlines()
    nLines = len(lines)
    excludedCellsDict = {}
    for iLine in range(nLines):
        line = lines[iLine]
        splitLine = line[:-1].split(',')
        excludedCellsDict[splitLine[0]] = splitLine[1:]
    return(excludedCellsDict)

In [328]:
getCellTimeSeriesData('20-08-05_M1_P1_C6')

Unnamed: 0,idxCompression,T,B,F,dx,dy,dz,D2,D3
0,0,0.727,11.50115,276.217424,4.324810,1.517911,0.000,4.583453,4.583453
1,0,1.368,11.50345,277.608978,4.314304,1.524557,0.000,4.575750,4.575750
2,0,1.928,11.50575,273.825734,4.318608,1.543797,0.000,4.586249,4.586249
3,0,2.607,11.50460,276.509402,4.311139,1.536646,0.000,4.576811,4.576811
4,0,3.167,11.50000,274.131448,4.307975,1.556013,0.000,4.580373,4.580373
...,...,...,...,...,...,...,...,...,...
1436,0,187.781,11.50345,493.528554,3.701962,1.441329,0.007,3.972651,3.972657
1437,0,188.419,11.50115,494.043072,3.691456,1.453228,0.007,3.967205,3.967211
1438,0,189.019,11.50115,499.774809,3.683418,1.447342,0.007,3.957571,3.957577
1439,0,189.580,11.50000,510.139949,3.664430,1.442911,0.007,3.938279,3.938285


In [329]:
plotCellTimeSeriesData('21-01-21_M1_P1_C2')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### Data extraction

In [330]:
mecaDataFile = 'Global_MecaData.txt'
mecaDataFilePath = os.path.join(dataDir, mecaDataFile)
mecaDF = pd.read_csv(mecaDataFilePath, ',')
print('Extracted a table with ' + str(mecaDF.shape[0]) + ' lines and ' + str(mecaDF.shape[1]) + ' columns.')

mecaDF = mecaDF.rename(columns={"CellID": "CellName", "CellName": "CellID"})

Extracted a table with 5083 lines and 20 columns.


In [331]:
# mecaDF

In [332]:
experimentalDataFile = 'ExperimentalConditions.csv'
experimentalDataFilePath = os.path.join(experimentalDataDir, experimentalDataFile)
expConditionsDF = pd.read_csv(experimentalDataFilePath, ',',header=1)
print('Extracted a table with ' + str(expConditionsDF.shape[0]) + ' lines and ' + str(expConditionsDF.shape[1]) + ' columns.')

# Cleaning the table
try:
    expConditionsDF = expConditionsDF.convert_dtypes()

    listTextColumns = []
    for col in expConditionsDF.columns:
        if expConditionsDF[col].dtype == 'string':
            listTextColumns.append(col)

    expConditionsDF[listTextColumns] = expConditionsDF[listTextColumns].apply(lambda x: x.str.replace(',','.'))

    expConditionsDF['scale pixel per um'] = expConditionsDF['scale pixel per um'].astype(float)
    expConditionsDF['optical index correction'] = \
              expConditionsDF['optical index correction'].apply(lambda x: x.split('/')[0]).astype(float) \
            / expConditionsDF['optical index correction'].apply(lambda x: x.split('/')[1]).astype(float)
    expConditionsDF['magnetic field correction'] = expConditionsDF['magnetic field correction'].astype(float)
    expConditionsDF['with fluo images'] = expConditionsDF['with fluo images'].astype(bool)

    expConditionsDF['ramp field'] = \
    expConditionsDF['ramp field'].apply(lambda x: [x.split(';')[0], x.split(';')[1]] if not pd.isnull(x) else [])

except:
    print('Unexpected bug with the cleaning step')

Extracted a table with 18 lines and 19 columns.


In [333]:
# expConditionsDF

In [334]:
# Unused for now
cellDescriptionDataFile = 'CellDescription.csv'
cellDescriptionDataFilePath = os.path.join(experimentalDataDir, cellDescriptionDataFile)
cellDescriptionDF = pd.read_csv(cellDescriptionDataFilePath, ',')
print('Extracted a table with ' + str(cellDescriptionDF.shape[0]) + ' lines and ' + str(cellDescriptionDF.shape[1]) + ' columns.')

Extracted a table with 58 lines and 14 columns.


In [335]:
mecaDF['ManipID'] = mecaDF['ExpDay'] + '_' + mecaDF['CellName'].apply(lambda x: x.split('_')[0])
expConditionsDF['ManipID'] = expConditionsDF['date'] + '_' + expConditionsDF['manip']

mainMecaDF = pd.merge(
    expConditionsDF,
    mecaDF,
    how="inner",
    on='ManipID',
#     left_on=None,
#     right_on=None,
#     left_index=False,
#     right_index=False,
#     sort=True,
#     suffixes=("_x", "_y"),
#     copy=True,
#     indicator=False,
#     validate=None,
)

In [336]:
# pd.set_option('display.max_columns', None)
# mainMecaDF.head()

In [337]:
pd.reset_option('max_columns')

### Data filtering

In [338]:
# pd.set_option('display.max_columns', None)
# mainMecaDF.head()

In [339]:
# pd.reset_option('max_columns')
# mainMecaDF.columns

In [340]:
mainMecaDF_f = mainMecaDF.loc[(mainMecaDF["Validated"] == 1)]
# mainMecaDF_f

In [353]:
listCells = mainMecaDF_f['CellID'].drop_duplicates().astype('string').values
timeSeriesDict = {}
for cell in listCells:
    currentCell_TimeSeriesData = getCellTimeSeriesData(cell)
    timeSeriesDict[cell] = currentCell_TimeSeriesData
start, stop = 80, 100
fig, axes = plt.subplots((stop-start),1, figsize = (7,4*(stop-start)))
fig.tight_layout()
for k in range(start, stop):
    if k < len(listCells):
        currentCell_TimeSeriesData = timeSeriesDict[listCells[k]]
        T = currentCell_TimeSeriesData['T'].values
        idxCompression = currentCell_TimeSeriesData['idxCompression'].values
        D3 = currentCell_TimeSeriesData['D3'].values
        maskConstant = (idxCompression == 0)
        maskCompression = (idxCompression > 0)
        axes[k - start].plot(T, D3*1000-4503, 'k-', linewidth = 0.5)
        axes[k - start].plot(T[maskCompression], D3[maskCompression]*1000-4503, 'ro', markersize=2)
        axes[k - start].plot(T[maskConstant], D3[maskConstant]*1000-4503, 'co', markersize=2)
        axes[k - start].set_title(listCells[k])
        axes[k - start].set_xlabel('T (s)')
        axes[k - start].set_ylabel('D3 (µm)')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [359]:
addExcludedCell('21-01-18_M1_P1_C2', 'passive')
addExcludedCell('21-01-18_M1_P1_C3', 'passive')
addExcludedCell('21-01-18_M1_P1_C4', 'passive')
addExcludedCell('21-01-21_M3_P1_C4', 'passive')
addExcludedCell('21-01-21_M3_P1_C5', 'passive')
addExcludedCell('20-08-07_M1_P1_C6', 'too thick')
addExcludedCell('20-08-07_M1_P1_C62', 'too thick')

excludedCellsDict = getExcludedCells()
# excludedMask = (mainMecaDF_f["CellID"].values not in excludedCellsDict.keys())
# mainMecaDF_f = mainMecaDF_f.loc[(mainMecaDF_f["CellID"].values not in excludedCellsDict.keys())]
for i in range()
mainMecaDF_f["CellID"].drop_duplicates().astype('string').values

TypeError: unhashable type: 'StringArray'

In [232]:
currentCell_TimeSeriesData

Unnamed: 0,idxCompression,T,B,F,dx,dy,dz,D2,D3
0,0,0.768,11.50460,321.321556,4.398291,0.942215,-0.477606,4.498081,4.523366
1,0,1.408,11.50460,321.679042,4.405696,0.925633,-0.465345,4.501884,4.525870
2,0,1.969,11.50115,316.355046,4.431835,0.911139,-0.453141,4.524527,4.547161
3,0,2.568,11.50115,314.507106,4.440633,0.911203,-0.441001,4.533157,4.554557
4,0,3.168,11.50345,316.610689,4.441772,0.893797,-0.428923,4.530807,4.551065
...,...,...,...,...,...,...,...,...,...
772,0,96.608,11.50115,297.323175,4.498291,0.807342,-0.636829,4.570167,4.614323
773,0,97.248,11.50115,295.549708,4.497215,0.826139,-0.644141,4.572467,4.617615
774,0,97.889,11.50345,291.908324,4.502468,0.848544,-0.651460,4.581730,4.627812
775,0,98.449,11.50575,290.159565,4.508418,0.846646,-0.658898,4.587226,4.634305


In [179]:
mainMecaDF_GroupedPerCell = mainMecaDF_f.groupby('CellID')
mainMecaDF_DataPerCell = mainMecaDF_GroupedPerCell.agg({"EChadwick": np.median, "SurroundingThickness": np.median, "H0Chadwick" : np.median})
# mainMecaDF_GroupedPerCell.agg({"EChadwick": np.median, "D": lambda x: np.std(x, ddof=1)})
cols = ['date', 'manip', 'experimentType', 'drug', 'substrate',
       'objective magnification', 'scale pixel per um', 'objective immersion',
       'optical index correction', 'magnetic field correction', 'cell type',
       'cell subtype', 'bead type', 'bead diameter', 'normal field',
       'ramp field', 'compression duration', 'with fluo images', 'comments',
       'ManipID', 'ExpType', 'CellName', 'CellID']
mainMecaDF_DataPerCell.dropna(inplace = True)
mainMecaDF_DataPerCell = pd.merge(mainMecaDF_DataPerCell,
                                  mainMecaDF_f[cols].drop_duplicates(subset=['CellID']),
                                  how="inner",
                                  on='CellID',
                                  #     left_on='CellID',
                                  #     right_on='CellID',
                                  #     left_index=False,
                                  #     right_index=False,
                                  #     sort=True,
                                  #     suffixes=("_x", "_y"),
                                  #     copy=True,
                                  #     indicator=False,
                                  #     validate=None,
                                  )
# mainMecaDF_DataPerCell

In [180]:
mainMecaDF_DataPerCell_Count = mainMecaDF_DataPerCell.groupby(['cell type', 'cell subtype', 'bead type', 'drug', 'substrate']).count()
mainMecaDF_DataPerCell_Count.loc[:, ['CellID']].rename(columns={'CellID' : 'Count'})

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Count
cell type,cell subtype,bead type,drug,substrate,Unnamed: 5_level_1
3T3,aSFL,M450,doxycyclin,20um fibronectin discs,27
3T3,aSFL,M450,doxycyclin,BSA coated glass,14
3T3,aSFL,M450,none,20um fibronectin discs,32
3T3,aSFL,M450,none,BSA coated glass,8


### Plots

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000027A8EDA3BC8>