In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import SPTCata as spt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os, sys
import re
from collections import defaultdict
import itertools

# for fitting
from scipy.optimize import curve_fit
from scipy import stats

# 2019-05-14 MSD again

Here, I want to try to calculate the MSD for the tracks that I have. In the previous notebooks I managed to calculate it but the results of the calculations were not satisfactory, as the data was noisy and I couldn't get clean MSD curves.

## Load data

Let's load the data with a minimum quality of 50.

In [None]:
datadirs = {
    'Olaparib_R5020' : '../data/1_Olaparib_R5020',
    'R5020' : '../data/2_DMSO_R5020_Control',
    'EtOH' : '../data/3_EtOH_Nohormone_Control'
}

# the quality
quality = 50.0

# temporal and spatial scale
dt = 0.015             # in seconds (15 ms)
scale_l = 0.160        # in microns (160 um)

# cycle through all the directories and do the analysis
experiments = defaultdict(list)
for treatment, datadir in datadirs.items() :
    for subdir in os.listdir(datadir) :
        full_dir_name = '%s/%s'%(datadir, subdir)
        experiment = spt.SPT(full_dir_name, scale_l, links=False, quality=quality)
        experiments[treatment].append(experiment)

## Track statistics

Let's start to look at the statistics of the tracks that we have, so that we can kind of know what to expect.

In [None]:
track_length = defaultdict(list)
for treatment, experiment_batch in experiments.items() :
    for experiment in experiment_batch :
        track_length[treatment].extend([len(t) for t in experiment.trajectory_spots])

In [None]:
# plot histograms
bins = np.linspace(0, 100, 100)
for treatment, lengths in track_length.items() :
    plt.hist(lengths, bins=bins)
    plt.title(treatment, fontsize=18)
    plt.xlabel("Length")
    plt.ylabel("Counts")
    plt.show()

There are relatively few tracks that have more than 20 points. But let's try to extract something useful.

## MSD Analysis

To build the MSD calculation function, let's proceed step by step.

In [None]:
def msd_track(track) :
    """
    This function calculates the MSD of a single track. `track` needs to be
    an np.array with dimensions (T, 2)
    """
    # get the length of the track
    T = track.shape[0]
    
    # initialize a matrix that will allow us to calculate the MSD: the matrix
    # has T-1 rows, corresponding to each time point except the last one (because
    # for the last one we cannot calculate any difference in time); it has then
    # T-1 columns, so that it stores all the possible time intervals from 1 to T
    delta = np.zeros((T-1, T-1))
    delta.fill(np.nan)
    
    # calculate the MSD for the single track
    for t0 in range(T-1) :
        for delay in range(1,T-t0) :
            t1 = t0 + delay
            pos1 = track[t1,:]
            pos0 = track[t0,:]
            delta[t0, delay-1] = np.sum((pos1-pos0)**2)
    
    # now we can return the mean of this track
    return np.nanmean(delta, axis=0)

In [None]:
# this is the function that we will use to calculate the MSD
def msd_t(tracks) :
    
    # init the variables
    ntracks = len(tracks)
    lengths = [t.shape[0] for t in tracks]
    max_t = int(max(lengths)) - 1
    
    # We init a matrix that will contain the MSD calculated for each of the
    # tracks. It will have `ntracks` rows and `max_t - 1` columns.
    MSD = np.zeros((ntracks, max_t))
    MSD.fill(np.nan)

    # iterate through the all the tracks
    for i, track in enumerate(tracks) :
        m = msd_track(track)
        MSD[i, 0:len(m)] = m
    
    # return the MSD
    return np.nanmean(MSD, axis=0)

Okay, this seems to be working. Let's do the full calculation.

In [None]:
tracks = defaultdict(list)
msd = defaultdict(list)
for treatment, experiment_batch in experiments.items() :
    for experiment in experiment_batch :
        tracks[treatment].extend([t for t in experiment.trajectory_spots])
    msd[treatment] = msd_t(tracks[treatment])

The final piece is to put in the fit to the theoretical $MSD(t)$.

In [None]:
def linear_regression (x,y,prob) :
    """
    Fit (x,y) to a linear function, using maximum likelihood estimation of the
    confidence intervals on the coefficients, given the user-supplied
    probability *prob*
    """
    n = len(x)
    xy = x*y
    xx = x*x
    # estimates
    xmean = x.mean()
    ymean = y.mean()
    xxmean = xx.mean()
    xymean = xy.mean()
    b1 = (xymean-xmean*ymean) / (xxmean-xmean**2)
    b0 = ymean-b1*xmean
    s2 = 1./n * sum([(y[i] - b0 - b1 * x[i])**2 for i in range(n)])
    #confidence intervals
    alpha = 1 - prob
    c1 = stats.chi2.ppf(alpha/2.,n-2)
    c2 = stats.chi2.ppf(1-alpha/2.,n-2)
    # get results and return
    c = -1 * stats.t.ppf(alpha/2.,n-2)
    bb1 = c * (s2 / ((n-2) * (xxmean - (xmean)**2)))**.5
    bb0 = c * ((s2 / (n-2)) * (1 + (xmean)**2 / (xxmean - xmean**2)))**.5
    return b0,b1,bb0,bb1

In [None]:
def fit_msd (msd,cutoff,delta_t) :
    # prepare the values to fit: exclude the first value because it is zero
    t = np.arange(1, msd.size+1)*delta_t
    x = np.log(t[:cutoff])
    y = np.log(msd[:cutoff])
    # perform fit to y = ax + b with their errors
    b,a,db,da = linear_regression (x,y,0.99)
    # now convert the value of b into a diffusion coefficient
    D = np.exp(b)/4
    dD = np.exp(db)/4
    return a,da,D,dD

In [None]:
cutoff = 12
msd_fits = {}
for treatment, m in msd.items() :
    msd_fits[treatment] = fit_msd(m, cutoff, dt)

In [None]:
# prepare figure
fig = plt.figure(figsize=(8,6))
ax = plt.subplot(111)

# markers and colors
marker = itertools.cycle(('+', '.', '^'))
colors = itertools.cycle(('r', 'g', 'b'))

# plot for all the treatment conditions
for treatment, m in msd.items() :
    
    # t axis
    t = dt*np.arange(cutoff)
    
    # get the fit values
    a, da, D, dD = msd_fits[treatment]
    print("%s: alpha = %.3f +/- %.3f; D = %.3f +/- %.3f um2/s"%(treatment, a, da, D, dD))
    
    # the diffusion model: 2D diffusion
    yfit = 4 * D * t ** a
    
    # plot the data and the fit to the model
    color = next(colors)
    ax.loglog(dt*np.arange(m.shape[0]), m, next(marker),
              color = color, markersize = 8, label = treatment)
    ax.loglog(t, yfit, '--', color = color)

# finish up with plot style
ax.legend(loc='lower left', fontsize = 14)
ax.set_xlabel(r'$log[\Delta t]$ [s]', fontsize = 18)
ax.set_ylabel(r'$\log [MSD (\Delta t)]$', fontsize = 18)
plt.show()

In [None]:
np.savetxt('test.dat', msd_fits['Olaparib_R5020'])
np.savetxt('test.dat', msd['Olaparib_R5020'])