In [None]:
%matplotlib inline
import pandas as pd
import os
import numpy as np

# plotting
import matplotlib  as mpl
import matplotlib.pylab as plt
# our packages
from pharaglow import extract, util
from pharaglow.util import smooth

In [None]:
# matplotlib settings to make stuff nice. This can also be imported using a style sheet.


##### Figure 1 Preemptive pumping requires bacterial odors  
- establishing preemptive pumping: In wild type (GRU101) for 24 and 1h(?) lawns. Here we can use the Zeiss data and then say we wanted to upscale to larger N -> macroscopes.
- (A) experimental setup biorender
- (B) and (C) Wildtype lawn encounter velocity and pumping rate
- (D) comparison GRU101 on short and long-term lawn (maybe a theoretical calculation of odor diffusion from luis would be nice to have). Can be added late. A quick back of the envelope calculation says the 24h should have 6mm diffusion only. This is pretty cool because then we should see how it differs between 24h and no incubation.)
- (E) Summary statistics for D 
- (F) worms that can’t smell


### Define convencience functions for data loading

In [None]:
def plotPumpAnalysis(pump, ps, roc, prs, time, metric):
    nt = len(prs)
    npeaks = [len(p) for p in ps]
    # plot the roc curve
    plt.figure('Analysis', figsize=(12,8))
    plt.subplot(221)
    plt.plot(prs, [len(p) for p in ps])
    plt.ylabel('Number of peaks')
    plt.xlabel('peak prominence parameter')
    plt.subplot(222)
    for ind, i in enumerate([0,nt//2, nt-1]):
        plt.text(time.iloc[0], pump[0] + 3*ind+1.2, 'prominence = {:.2}'.format(prs[i]))
        plt.plot(time, pump + 3*ind, color ='navy', lw = 0.25)
        plt.plot(time.iloc[ps[i]], pump[ps[i]] + 3*ind,'r.')
    plt.ylim(-2, 8)
    plt.yticks([])
    plt.xlabel('Time (frames)')
    plt.subplot(223)
    for ind, i in enumerate(range(0,nt,5)):
        plt.plot(ps[i]+time.iloc[0], np.arange(len(ps[i])), color ='k', alpha=0.2+0.8*i/len(prs), lw=0.75)
    plt.plot(ps[np.argmin(metric)]+time.iloc[0], np.arange(len(ps[np.argmin(metric)])), lw = 2, color='g')
    plt.ylabel('Cumulative peaks found')
    plt.xlabel('Time (frames)')
    plt.subplot(224)
    plt.plot(npeaks, metric)
    plt.plot(npeaks[np.argmin(metric)], np.min(metric), 'ro')
    print(len(ps[np.argmin(metric)]))
    plt.xlabel('Number of peaks')
    plt.ylabel('peak disimilarity')
    #[plt.plot(np.std(r, axis =1)) for r in roc]
    plt.tight_layout()

In [None]:
def getVelocity(traj, umperPx, fps):
    return np.sqrt((np.diff(traj['x'])**2+np.diff(traj['y'])**2))/np.diff(traj['frame'])*umperPx*fps

def getPumps(pumps, ws = 30, prs = np.linspace(0.5,0.95,50) ):
    return extract.bestMatchPeaks(pumps, ws, prs)

def getKymo(df, key):
    kymo = np.sum([np.array(list(filter(None.__ne__,row))) for row in df[key].values], axis=2)
    kymo = np.array([np.interp(np.linspace(0, len(row), 100), np.arange(len(row)), np.array(row)) \
                      for row in kymo])
    kymo = extract.alignKymos(kymo).T
    return kymo

def readData(dataFolder, umPerPx, fps, j = 0):
    df = {}
    path = os.path.dirname(dataFolder)
    for fn in os.listdir(path):
        file = os.path.join(path,fn)
        if os.path.isfile(file) and 'results_' in fn and fn.endswith('.json'):
            print('Reading', file)
            particle_index = int(fn.split('.')[0].split('_')[-1])
            if particle_index not in [5,7]:
                continue
            traj =  pd.read_json(file, orient='split', numpy = True)
            # velocity
            t = traj['frame']/fps
            v = getVelocity(traj, umPerPx, fps)
            # pumping related data
            kymo = getKymo(traj, 'Straightened')
            rawPump = [-np.max(np.std(sIm, axis =1), axis =0) for sIm in traj['Straightened']]
            traj['pump'] = rawPump
            prs =  np.linspace(0.15,1.00,50)
            p, pump, pks, roc, metric  = getPumps(traj['pump'].values, prs = prs)
            plotPumpAnalysis(pump, pks, roc, prs, time = traj['frame'], metric=metric)
            pinterp = np.interp(traj['frame'], p[:-1]+traj['frame'].iloc[0], fps/np.diff(p))
            # get a binary trace where pumps are 1 and non-pumps are 0
            tmp = np.zeros(len(t))
            tmp[p] = 1
            
            df[j] = {'time': t.values,
                     'x': traj['x'].values,
                     'y': traj['y'].values,
                             'velocity':v,
                             'peaks': p.values,
                             'pumpTrace':pump,
                             'binaryPumps': tmp,
                             'pumpInterp': pinterp,
                             'inside': traj['inside'].values,
                             'insideF': traj['insideHeadIntensity'].values,
                             'pid':particle_index,
                             'filename': fn,
                             'kymo': kymo,
                             'fps': fps,
                             'scale':umPerPx
                            }
            j +=1
    return df

### Read relevant data

In [None]:
%%time
# some parameters
# FPS?
fps = 30
# HOW MANY MICRONS PER PIXEL?
umPerPx = 2.34

# for this figure we need GRU101 control data
ctrl = "/media/scholz_la/hd2/Nicolina/Pharaglow/Old_files/Pharaglow_v5/10x_GRU101_RFP_24hr/"
# Making dictionary combining data with strain name in their separate master folders
Data = {}
for strainName, folder in zip(['Control', 'INF100'], [ctrl]):
    Data[strainName] = readData(folder, umPerPx, fps)


In [None]:
# def zscore(x, window):
#     r = x.rolling(window=window)
#     m = r.mean().shift(1)
#     s = r.std(ddof=0).shift(1)
#     z = (x-m)/s
#     return z

def getEntryTime(data, method = 'binary', window = 30):
    if method =='binary':
        # get the first entry from binarized lawn data. data should be the 'inside' variable from pharaglow
        t0 = np.where(data ==1)[0]
    if method =='fluorescence':
        # get the entry from change detection in the fluorescence curve
        # calculate the rolling derivative of the data
        data = pd.Series(data)
        data = data.rolling(window).mean() 
        z = zscore(data, window*10)
        plt.figure()
        plt.subplot(211)
        plt.plot(data/max(data))
        plt.plot(z)
        plt.subplot(212)
        plt.plot(data.values)
        t0 = np.where(z > 5)[0]
        
    if len(t0>0):
        plt.axvline(t0[0])
        return t0[0]
    else:
        return np.nan

### Helper functions to generate aligned velocity and pumping data
def alignData(df, tBefore, tAfter):
    """df is a dictionary created in readData. We will find the point of lawn entry and align and crop data to
    tBefore, TAfter. tBefore, tAfter are in frames."""
    for idx in df.keys():
        # identify the inside point t0 or set a flag that it doesn't enter
        try:
            # t0 is in frames not seconds!
            t0 = getEntryTime(df[idx]['insideF'], method = 'fluorescence')
            #t0 = getEntryTime(df[idx]['inside'], method = 'binary')
            print(t0, np.mean(df[idx]['inside']))
            if t0 > tBefore and len(df[idx]['inside']) > t0+tAfter:
                # update the data
                df[idx]['t0'] = t0
                df[idx]['enter'] = True
            else:
                df[idx]['t0'] = None
                df[idx]['enter'] = False
        except IndexError:
            df[idx]['t0'] = None
            df[idx]['enter'] = False
        # calculate cropped versions of the data for animals that entered the lawn
        if df[idx]['enter']:
            for dname in ['insideF', 'velocity', 'time', 'pumpTrace', 'binaryPumps', 'pumpInterp']:
                df[idx][dname+'Cut'] = df[idx][dname][df[idx]['t0']-tBefore: df[idx]['t0']+tAfter+1]
            # peaks and other info
            df[idx]['peaksCut'] = df[idx]['peaks'][(df[idx]['peaks'] > df[idx]['t0']-tBefore)&
                                    (df[idx]['peaks'] < df[idx]['t0']+tAfter+1)]
            #shift the cut peaks to the correct locations relative to cut window
            df[idx]['peaksCut'] =  df[idx]['peaksCut'] - df[idx]['t0']+tBefore
            df[idx]['timeEntry'] = np.arange(-tBefore, tAfter+1)
            df[idx]['tBefore'] = tBefore
            df[idx]['tAfter'] = tAfter
    return df
    
    

### Setting up the figure layout using gridspec

In [None]:
tBefore=1*40*30
tAfter= 10*30
df = alignData(Data['Control'], tBefore=tBefore, tAfter=tAfter)

In [None]:
f1 = plt.figure("Lawn Entry", figsize=(12,8))
grid = mpl.gridspec.GridSpec(ncols=3, nrows =3)
ax1 = f1.add_subplot(grid[0,0])
ax2 = f1.add_subplot(grid[1,0])

# plot individual traces
for idx in df.keys():
    if df[idx]['enter']:
        # pumping rate
        pr = 30./np.diff(df[idx]['peaksCut'])
        #ax1.plot(df[idx]['timeEntry'][df[idx]['peaksCut']][:-1]/df[idx]['fps'], util.smooth(pr, 60))
        ax1.plot(df[idx]['timeEntry']/df[idx]['fps'], util.smooth(df[idx]['pumpInterpCut'], 60))
        ax2.plot(df[idx]['timeEntry']/df[idx]['fps'], util.smooth(df[idx]['velocityCut'], 60))
# mark entry
ax1.axvspan( 0, tAfter/fps, color='k', alpha=0.3)
ax2.axvspan( 0, tAfter/fps, color='k', alpha=0.3)
# label properly
ax1.set(ylabel='Pumping rate (HZ)', xlabel='Time (s)', ylim=(0,6))
ax2.set(ylabel='Velocity (um/s)', xlabel='Time (s)', ylim=(0,250))

# plot aggregate mean data
ax3 = f1.add_subplot(grid[0,1])
ax4 = f1.add_subplot(grid[1,1])
t, v, pr, f = [], [], [], []
for idx in df.keys():
    if df[idx]['enter']:
        # pumping rate using rolling mean rate in a rolling 1 second window
        #tmp = pd.Series(df[idx]['binaryPumpsCut']).rolling(fps).sum()
        tmp = df[idx]['pumpInterpCut']
        t.append(df[idx]['timeEntry']/df[idx]['fps'])
        pr.append(tmp)
        v.append(df[idx]['velocityCut'])
        f.append(df[idx]['insideFCut']/np.max(df[idx]['insideFCut']))

# make means and plot
ax3.plot(np.mean(t, axis=0), util.smooth(np.mean(pr, axis = 0),30))
ax4.plot(np.mean(t, axis=0), util.smooth(np.mean(v, axis = 0), 30))

# mark entry
ax3.axvspan( 0, tAfter/fps, color='k', alpha=0.3)
ax4.axvspan( 0, tAfter/fps, color='k', alpha=0.3)
# set labels
ax3.set(ylabel='mean pumping rate (HZ)', xlabel='Time (s)', ylim=(0,6))
ax4.set(ylabel='mean velocity (um/s)', xlabel='Time (s)', ylim=(0,250))

# plot sanity checks
ax5 = f1.add_subplot(grid[0,2])
ax6 = f1.add_subplot(grid[1,2])
ax7 = f1.add_subplot(grid[2,:])
for idx in df.keys():
    if df[idx]['enter']:
        # pumping rate
        ax7.plot(df[idx]['timeEntry']/df[idx]['fps'], df[idx]['pumpTraceCut'])
        ax5.plot(df[idx]['timeEntry']/df[idx]['fps'], np.cumsum(df[idx]['binaryPumpsCut']))
        # plot an average pumping rate line at 1.5 hz
        ax5.plot(df[idx]['timeEntry']/df[idx]['fps'], 1.5*(df[idx]['timeEntry']+tBefore)/df[idx]['fps'], 'k--')
        # plot lawn intensity
        ax6.plot(df[idx]['timeEntry']/df[idx]['fps'], df[idx]['insideFCut'])

# mark entry
ax5.axvspan(0, tAfter/fps, color='k', alpha=0.3)
ax6.axvspan(0, tAfter/fps, color='k', alpha=0.3)
# set labels
ax5.set(ylabel='cumulative pumps', xlabel='Time (s)')
ax6.set(ylabel='Fluorescence', xlabel='Time (s)')
ax7.set(ylabel='raw pumping trace', xlabel='Time (s)')
#
plt.tight_layout()