### Persistence time analysis

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.stats import norm
from scipy import stats
import json

In [7]:
def filter_data_new(data_in, restriction=10000):
    """
    Only keeps data for tracks that are long enough for hurst exponent estimation at a given downsampling step size.
    Args: 
        data_in: pandas dataframe, original data
        max_step_size: int, maximum downsampling step size
        window: int, size of rolling window for hurst component estimation
    Returns:
        filtered_data: pandas dataframe
    """

    tracks_to_keep = data_in.TrackID.value_counts().loc[lambda x: x>2].reset_index()['index']

    if restriction<len(tracks_to_keep):
        filtered_data = data_in[data_in['TrackID'].isin(tracks_to_keep[:restriction])]
    else:
        filtered_data = data_in[data_in['TrackID'].isin(tracks_to_keep)]

    return filtered_data

def downsample(data_input, down_int, start_index=0):
    
    data_out = data_input.iloc[start_index::down_int]
    return data_out



In [8]:
def get_mod(v):
    
    return np.sqrt(v.dot(v))

def get_step_vector(index,data_x,data_y,data_z):
    
    dx = data_x[index]-data_x[index-1]
    dy = data_y[index]-data_y[index-1]
    dz = data_z[index]-data_z[index-1]
    
    step_vector = np.array([dx,dy,dz])   

    return step_vector  

def get_cos_theta(data_in):
    
    track_id_values = np.unique(data_in['TrackID'])
    cos_theta_avg = np.array([])
    
    for tid in track_id_values:
        #print('track_id ={}'.format(tid))
        track_data = data_in[data_in['TrackID']==tid]
        x = track_data['Position X'].values
        y = track_data['Position Y'].values
        z = track_data['Position Z'].values
        
        length = len(track_data)
        step_vectors = np.empty((length-1,3))
        cos_theta_temp = np.empty(length-2)
        print(length)
        for i in np.arange(length):
            if i>=1:
                
                step_vectors[i-1] = get_step_vector(i,x,y,z)
            if i>=2:
                
                v0 = step_vectors[i-2]
                v1 = step_vectors[i-1]
                cos_theta_temp[i-2] = np.dot(v0,v1)/(get_mod(v1)*get_mod(v0))

        cos_theta_avg = np.append(cos_theta_avg,np.mean(cos_theta_temp))
        
    return cos_theta_avg

def dsample_get_cos_theta(data_in, ds_rate):
    """
    Args:
        data_in: pandas dataframe
        ds_rate: int, downsampling step size
    Returns:
        h_arr: 2D np array
    """
    cos_theta_avgs = np.array([])
    for i in np.arange(ds_rate):
        
        downsampled_data = downsample(data_in, ds_rate, i)

        cos_theta_avgs_temp = get_cos_theta(downsampled_data)
        #print(len(cos_theta_avgs))
            
  #      if i == 0:
   #         cos_theta_avgs = cos_theta_avgs_temp
    #    else:
     #       
      #      if len(cos_theta_avgs_temp)>np.shape(cos_theta_avgs)[0]:
       #         cos_theta_avgs_temp=cos_theta_avgs_temp[:-1]
            
        cos_theta_avgs = np.append(cos_theta_avgs,cos_theta_avgs_temp)
        
    return cos_theta_avgs

def get_cos_theta_dict(filtered_data, step_sizes):
    """
    """
    cos_theta_dict = {}
    
    for i, s in enumerate(step_sizes):
        print('step_size ={}'.format(s))
        cos_theta_arr = dsample_get_cos_theta(filtered_data, s)
        cos_theta_dict["{}".format(s)] = cos_theta_arr.tolist()
        
    return cos_theta_dict

def save_cos_theta_data(cos_theta_data, file_name):#, window, step_sizes, restriction):
    """
    """
    #with open('cos_theta_dict_'+'w{}_'.format(window)+'s{}_'.format(max(step_sizes))+'r{}_'.format(restriction)+file_name, 'w')
    with open('cos_theta_dict_'+file_name, 'w') as file:
        file.write(json.dumps(cos_theta_data))
      

In [9]:
filenames = np.array(['Control_frame001-200',
                      'Control_frame200-400',
                      'Control_frame400-600',
                      'Control_frame600-800',
                      'Control_frame800-1000',
                      'Control_frame1000-1200',
                      'LanB1_frame001-200',
                      'LanB1_frame200-400',
                      'LanB1_frame400-600',
                      'LanB1_frame600-800',
                      'LanB1_frame800-1000',
                      'LanB1_frame1000-1200',
                      'defLanB1_300817_frame200-400',
                      'defLanB1_300817_frame400-600'])

In [11]:
step_sizes = [1,2,3,4,5,6,7,8]
restriction = 3
window = 20

# iterate over data files and save H dictionary 
for i, file in enumerate(filenames[6:8]):
    print('opened file {}'.format(file))
    data_ = pd.read_csv('haemocyte_tracking_data/' + file + '.csv')
    filtered_data = filter_data_new(data_,restriction)
    cos_theta_dict = get_cos_theta_dict(filtered_data, step_sizes)
    #print(cos_theta_dict)
    save_cos_theta_data(cos_theta_dict, file)

opened file LanB1_frame001-200
step_size =1
200
200
200
step_size =2
101
99
100
99
101
100
step_size =3
173
27
27
115
58
85
115
step_size =4
51
50
49
49
50
51
50
49
51
50
51
49
step_size =5
41
40
39
40
40
40
39
40
41
41
40
39
39
40
41
step_size =6
87
13
13
58
29
42
58
86
14
14
57
29
43
57
step_size =7
29
29
28
28
29
29
29
28
29
29
28
29
28
30
28
29
28
28
28
28
29
step_size =8
26
24
25
25
26
24
24
24
27
26
25
24
25
26
24
24
24
27
26
25
24
24
26
25
opened file LanB1_frame200-400
step_size =1
201
201
201
step_size =2
101
100
101
100
101
100
step_size =3
201
199
2
2
199
step_size =4
51
49
51
50
51
50
50
51
50
50
50
50
step_size =5
41
41
39
40
41
40
40
39
42
40
41
39
40
39


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


41
step_size =6
101
100


ValueError: negative dimensions are not allowed