### Persistence time analysis

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.stats import norm
from scipy import stats
import json

In [7]:
def filter_data_new(data_in, max_step):
    """
    Only keeps data for tracks that are long enough for hurst exponent estimation at a given downsampling step size.
    Args: 
        data_in: pandas dataframe, original data
        max_step_size: int, maximum downsampling step size
        window: int, size of rolling window for hurst component estimation
    Returns:
        filtered_data: pandas dataframe
    """

    tracks_to_keep = data_in.TrackID.value_counts().loc[lambda x: x//max_step >2].reset_index()['index']

    #if restriction<len(tracks_to_keep):
     #   filtered_data = data_in[data_in['TrackID'].isin(tracks_to_keep[:restriction])]
    #else:
     #   filtered_data = data_in[data_in['TrackID'].isin(tracks_to_keep)]
    
    filtered_data = data_in[data_in['TrackID'].isin(tracks_to_keep)]
    return filtered_data

def downsample(data_input, down_int, start_index=0):
    
    data_out = data_input.iloc[start_index::down_int]
    return data_out



In [11]:
def get_mod(v):
    
    return np.sqrt(v.dot(v))

def get_step_vector(index,data_x,data_y,data_z):
    
    dx = data_x[index]-data_x[index-1]
    dy = data_y[index]-data_y[index-1]
    dz = data_z[index]-data_z[index-1]
    
    step_vector = np.array([dx,dy,dz])   

    return step_vector  

def get_cos_theta(data_in,restriction):
    
    track_id_values = np.unique(data_in['TrackID'])
    cos_theta_avg = np.array([])
    
    print('------------')
    
    for tid in track_id_values[:restriction]:
        #print('track_id ={}'.format(tid))
        track_data = data_in[data_in['TrackID']==tid]
        x = track_data['Position X'].values
        y = track_data['Position Y'].values
        z = track_data['Position Z'].values
        
        
        length = len(track_data)
        print(length,length-2)
        step_vectors = np.empty((length-1,3))
        cos_theta_temp = np.empty(length-2)
        for i in np.arange(length):
            if i>=1:
                
                step_vectors[i-1] = get_step_vector(i,x,y,z)
            if i>=2:
                
                v0 = step_vectors[i-2]
                v1 = step_vectors[i-1]
                cos_theta_temp[i-2] = np.dot(v0,v1)/(get_mod(v1)*get_mod(v0))

        cos_theta_avg = np.append(cos_theta_avg,np.mean(cos_theta_temp))
        
    return cos_theta_avg

def dsample_get_cos_theta(data_in, ds_rate,restriction):
    """
    Args:
        data_in: pandas dataframe
        ds_rate: int, downsampling step size
    Returns:
        h_arr: 2D np array
    """
    #cos_theta_avgs = np.array([])
    for i in np.arange(ds_rate):
        
        downsampled_data = downsample(data_in, ds_rate, i)

        cos_theta_avgs_temp = get_cos_theta(downsampled_data,restriction)
        #print(len(cos_theta_avgs))
            
        if i == 0:
            cos_theta_avgs = cos_theta_avgs_temp
        else:
            
            if len(cos_theta_avgs_temp)>np.shape(cos_theta_avgs)[0]:
                cos_theta_avgs_temp=cos_theta_avgs_temp[:-1]
            
        cos_theta_avgs = np.append(cos_theta_avgs,cos_theta_avgs_temp)
        
    return cos_theta_avgs

def get_cos_theta_dict(filtered_data, step_sizes,restriction):
    """
    """
    cos_theta_dict = {}
    
    for i, s in enumerate(step_sizes):
        print('step_size ={}'.format(s))
        cos_theta_arr = dsample_get_cos_theta(filtered_data, s,restriction)
        cos_theta_dict["{}".format(s)] = cos_theta_arr.tolist()
        
    return cos_theta_dict

def save_cos_theta_data(cos_theta_data, file_name):#, window, step_sizes, restriction):
    """
    """
    #with open('cos_theta_dict_'+'w{}_'.format(window)+'s{}_'.format(max(step_sizes))+'r{}_'.format(restriction)+file_name, 'w')
    with open('cos_theta_dict_'+file_name, 'w') as file:
        file.write(json.dumps(cos_theta_data))
      

In [12]:
filenames = np.array(['Control_frame001-200',
                      'Control_frame200-400',
                      'Control_frame400-600',
                      'Control_frame600-800',
                      'Control_frame800-1000',
                      'Control_frame1000-1200',
                      'LanB1_frame001-200',
                      'LanB1_frame200-400',
                      'LanB1_frame400-600',
                      'LanB1_frame600-800',
                      'LanB1_frame800-1000',
                      'LanB1_frame1000-1200',
                      'defLanB1_300817_frame200-400',
                      'defLanB1_300817_frame400-600'])

In [13]:
step_sizes = [1,2,3,4,5,6,7,8]
restriction_val = 5

# iterate over data files and save H dictionary 
for i, file in enumerate(filenames[6:8]):
    print('opened file {}'.format(file))
    data = pd.read_csv('haemocyte_tracking_data/' + file + '.csv')
    filtered_data = filter_data_new(data,max(step_sizes))
    cos_theta_dict = get_cos_theta_dict(filtered_data, step_sizes,restriction_val)
    #print(cos_theta_dict)
    save_cos_theta_data(cos_theta_dict, file)

opened file LanB1_frame001-200
step_size =1
------------
200 198
197 195
125 123
157 155
44 42
step_size =2
------------
102 100
98 96
53 51
82 80
24 22
------------
98 96
99 97
72 70
75 73
20 18
step_size =3
------------
69 67
60 58
29 27
55 53
12 10
------------
62 60
73 71
39 37
48 46
20 18
------------
69 67
64 62
57 55
54 52
12 10
step_size =4
------------
58 56
42 40
26 24
34 32
13 11
------------
51 49
44 42
37 35
40 38
15 13
------------
44 42
56 54
27 25
48 46
11 9
------------
47 45
55 53
35 33
35 33
5 3
step_size =5
------------
46 44
39 37
21 19
37 35
3 1
------------
39 37
37 35
27 25
22 20
15 13
------------
38 36
37 35
17 15
43 41
9 7
------------
31 29
44 42
30 28
30 28
13 11
------------
46 44
40 38
30 28
25 23
4 2
step_size =6
------------
37 35
31 29
14 12
28 26
7 5
------------
31 29
39 37
23 21
22 20
10 8
------------
34 32
33 31
23 21
28 26
7 5
------------
32 30
29 27
15 13
27 25
5 3
------------
31 29
34 32
16 14
26 24
10 8
------------
35 33
31 29
34 32
26 24
5

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


21 19
29 27
18 16
17 15
3 1
opened file LanB1_frame200-400
step_size =1
------------
201 199
40 38
44 42
38 36
32 30
step_size =2
------------
104 102
16 14
18 16
15 13
19 17
------------
97 95
24 22
26 24
23 21
13 11
step_size =3
------------
66 64
13 11
16 14
10 8
9 7
------------
63 61
17 15
13 11
12 10
10 8
------------
72 70
10 8
15 13
16 14
13 11
step_size =4
------------
58 56
6 4
5 3
4 2
11 9
------------
64 62
17 15
11 9
14 12
4 2
------------
46 44
10 8
13 11
11 9
8 6
------------
33 31
7 5
15 13
9 7
9 7
step_size =5
------------
41 39
7 5
8 6
10 8
10 8
------------
48 46
6 4
8 6
9 7
5 3
------------
33 31
11 9
13 11
7 5
4 2
------------
35 33
13 11
8 6
7 5
6 4
------------
44 42
3 1
7 5
5 3
7 5
step_size =6
------------
29 27
5 3
6 4
3 1
6 4
------------
31 29
10 8
8 6
8 6
3 1
------------
43 41
4 2
7 5
8 6
6 4
------------
37 35
8 6
10 8
7 5
3 1
------------
32 30
7 5
5 3
4 2
7 5
------------
29 27
6 4
8 6
8 6
7 5
step_size =7
------------
27 25
3 1
7 5
8 6
3 1
------------

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ValueError: negative dimensions are not allowed