### Persistence time analysis

In [37]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import tensorflow as tf
from scipy.stats import norm
from sklearn.mixture import GaussianMixture as GMM
from scipy import stats
import json

In [121]:
def filter_data(data_in, max_step_size, window, restriction=10000):
    """
    Only keeps data for tracks that are long enough for hurst exponent estimation at a given downsampling step size.
    Args: 
        data_in: pandas dataframe, original data
        max_step_size: int, maximum downsampling step size
        window: int, size of rolling window for hurst component estimation
    Returns:
        filtered_data: pandas dataframe
    """
    tracks_to_keep = data_in.TrackID.value_counts().loc[lambda x: (x//max_step_size) > (window+1)].reset_index()['index']
    if restriction<len(tracks_to_keep):
        filtered_data = data_in[data_in['TrackID'].isin(tracks_to_keep[:restriction])]
    else:
        filtered_data = data_in[data_in['TrackID'].isin(tracks_to_keep)]
    return filtered_data

In [117]:
def get_mod(v):
    
    return np.sqrt(v.dot(v))

def get_step_vector(index,data_x,data_y,data_z):
    
    dx = data_x[index]-data_x[index-1]
    dy = data_y[index]-data_y[index-1]
    dz = data_z[index]-data_z[index-1]
    
    step_vector = np.array([dx,dy,dz])   

    return step_vector  

def get_cos_theta(data_in):
    
    track_id_values = np.unique(data['TrackID'])
    cos_theta_avg = np.array([])
    
    for tid in track_id_values:
        track_data = data[data['TrackID']==tid]
        x = track_data['Position X'].values
        y = track_data['Position Y'].values
        z = track_data['Position Z'].values
        
        length = len(track_data)
        step_vectors = np.empty((length-1,3))
        cos_theta_temp = np.empty(length-2)
        for i in np.arange(length):
            if i>=1:
                
                step_vectors[i-1] = get_step_vector(i,x,y,z)
            if i>=2:
                
                v0 = step_vectors[i-2]
                v1 = step_vectors[i-1]
                cos_theta_temp[i-2] = np.dot(v0,v1)/(get_mod(v1)*get_mod(v0))
                
        cos_theta_avg = np.append(cos_theta_avg,np.mean(cos_theta_temp))
                
    return cos_theta_avg

def get_cos_theta_dict(filtered_data, step_sizes, window, restriction):
    """
    """
    cos_theta_dict = {}
    
    for i, s in enumerate(step_sizes):
        cos_theta_arr = get_h_values(filtered_data, s, window, restriction)
        cos_theta_dict["{}".format(s)] = cos_theta_arr.tolist()
        
    return get_cos_theta_dict
      

In [38]:
filenames = np.array(['Control_frame001-200',
                      'Control_frame200-400',
                      'Control_frame400-600',
                      'Control_frame600-800',
                      'Control_frame800-1000',
                      'Control_frame1000-1200',
                      'LanB1_frame001-200',
                      'LanB1_frame200-400',
                      'LanB1_frame400-600',
                      'LanB1_frame600-800',
                      'LanB1_frame800-1000',
                      'LanB1_frame1000-1200',
                      'defLanB1_300817_frame200-400',
                      'defLanB1_300817_frame400-600'])

In [120]:
data = pd.read_csv('haemocyte_tracking_data/' + filenames[0] + '.csv')

In [118]:
cos_theta = get_cos_theta(data)

In [119]:
step_sizes = [1,2,3,4,5,6,7,8]
restriction = 30

# iterate over data files and save H dictionary 
for i, file in enumerate(filenames):
    print('opened file {}'.format(file))
    data = pd.read_csv('haemocyte_tracking_data/' + file + '.csv')
    filtered_data = filter_data(data, max(step_sizes), window, restriction)
    _dict = get_h_dict(filtered_data, step_sizes, window, restriction)
    save_h_data(h_dict, file, window, step_sizes, restriction)

0.3941953553715469
