In [4]:
"""
MPhys Project Semester 1
Mila Jelic & Paul Vautravers

Python script to get and save a dictionary file of displacements with TrackID[downsampling start index] as the keys, for a 
single step size at a time. Main function iterates over tracking data file names and over step sizes. 
"""

import numpy as np
import pandas as pd
import json
import os

In [14]:
def filter_data(data_in, max_step_size, window, restriction):
    """
    Only keeps data for tracks that are long enough for hurst exponent estimation at a given downsampling step size.
    Args: 
        data_in: pandas dataframe, original data
        max_step_size: int, maximum downsampling step size
        window: int, size of rolling window for hurst component estimation
        restriction: int, number of tracks to include
    Returns:
        filtered_data: pandas dataframe
    """
    if window % 2 == 1:  # for odd window size
        tracks_to_keep = data_in.TrackID.value_counts().loc[lambda x: (x//max_step_size) > (window)].reset_index()['index']
    else:  # for even window size
        tracks_to_keep = data_in.TrackID.value_counts().loc[lambda x: (x//max_step_size) > (window+1)].reset_index()['index']
    
    if restriction<len(tracks_to_keep):
        filtered_data = data_in[data_in['TrackID'].isin(tracks_to_keep[:restriction])]
    else:
        filtered_data = data_in[data_in['TrackID'].isin(tracks_to_keep)]

    return filtered_data

def find_displacement(x_data, y_data, z_data, start_index=0):

    disps = np.sqrt((x_data-x_data[0])**2 + (y_data-y_data[0])**2 + (z_data-z_data[0])**2)
    
    return disps

def downsample(data_input, down_int, start_index=0):
    
    data_out = data_input.iloc[start_index::down_int]
    
    return data_out

def get_ds_displacement(data_in, step_size):
    # for one track
    
    disps_arr = np.empty((step_size, len(data_in)//step_size))
    times_arr = np.empty((step_size, len(data_in)//step_size))
    
    for i in np.arange(step_size):
        downsampled_data = downsample(data_in, step_size, i)
        x = np.array(downsampled_data['Position X'])
        y = np.array(downsampled_data['Position Y'])
        z = np.array(downsampled_data['Position Z'])
        t = np.array(downsampled_data['Absolute Time'])
        displacements = find_displacement(x,y,z)
        #disp_times = (t[1:]+t[0:-1])/2
        
        if len(displacements) > len(data_in)//step_size:
            displacements = displacements[:-1]
            t = t[:-1]
        
        disps_arr[i] = displacements
        times_arr[i] = t
        
    return disps_arr, times_arr

def get_disp_dict(filtered_data, step_size):
    
    disp_dict = {}
    disp_time_dict = {}
    track_id_values = np.unique(filtered_data['TrackID'])
    
    for tid in track_id_values:
        #print('Track: {}'.format(tid))
        track_data = filtered_data[filtered_data['TrackID']==tid]
        disps_arr, disp_times_arr = get_ds_displacement(track_data[track_data['TrackID']==tid], step_size)
        for i, disps in enumerate(disps_arr):
            disp_dict['{}[{}]'.format(tid,i)] = (disps.ravel()).tolist()
        for i, times in enumerate(disp_times_arr):
            disp_time_dict['{}[{}]'.format(tid,i)] = (times.ravel()).tolist()
        
    return disp_dict, disp_time_dict

def save_disp_dict(disp_dict, disp_time_dict, filename, window, step_size, max_step_size, restriction):
    
    path = 'disp_dict_data/'+filename+'_s{}/'.format(max_step_size)
    try:
        with open(path+'disp_dict_'+filename+'_w{}_step{}_r{}'.format(window, step_size, restriction), 'w') as f:
            f.write(json.dumps(disp_dict))
        with open(path+'disp_time_dict_'+filename+'_w{}_step{}_r{}'.format(window, step_size, restriction), 'w') as f:
            f.write(json.dumps(disp_time_dict))
    except FileNotFoundError:
        os.makedirs(path)
        with open(path+'disp_dict_'+filename+'_w{}_step{}_r{}'.format(window, step_size, restriction), 'w') as f:
            f.write(json.dumps(disp_dict))
        with open(path+'disp_time_dict_'+filename+'_w{}_step{}_r{}'.format(window, step_size, restriction), 'w') as f:
            f.write(json.dumps(disp_time_dict))

def main(filenames, window, step_sizes, restriction):

    for f in filenames:
        data = pd.read_csv('haemocyte_tracking_data/'+f+'.csv')
        filtered_data = filter_data(data, max(step_sizes), window, restriction)
        for s in step_sizes:
            disp_dict, disp_time_dict = get_disp_dict(filtered_data, s)
            save_disp_dict(disp_dict, disp_time_dict, f, window, s, max(step_sizes), restriction)
            print('Saved disp dict for {} and step size {}'.format(f,s))


In [17]:
filenames = np.array(['Control_frame001-200',
                      'Control_frame200-400',
                      'Control_frame400-600',
                      'Control_frame600-800',
                      'Control_frame800-1000',
                      'Control_frame1000-1200',
                      'LanB1_frame001-200',
                      'LanB1_frame200-400',
                      'LanB1_frame400-600',
                      'LanB1_frame600-800',
                      'LanB1_frame800-1000',
                      'LanB1_frame1000-1200',
                      'defLanB1_300817_frame200-400',
                      'defLanB1_300817_frame400-600'])
window = 20
step_sizes = [1,2,3,4,5,6,7,8]
restriction = 10000

In [18]:
main(filenames, window, step_sizes, restriction)

# pretty fast, can iterate over all the files and step sizes without issues

Saved disp dict for Control_frame001-200 and step size 1
Saved disp dict for Control_frame200-400 and step size 1
Saved disp dict for Control_frame400-600 and step size 1
Saved disp dict for Control_frame600-800 and step size 1
Saved disp dict for Control_frame800-1000 and step size 1
Saved disp dict for Control_frame1000-1200 and step size 1
Saved disp dict for LanB1_frame001-200 and step size 1
Saved disp dict for LanB1_frame200-400 and step size 1
Saved disp dict for LanB1_frame400-600 and step size 1
Saved disp dict for LanB1_frame600-800 and step size 1
Saved disp dict for LanB1_frame800-1000 and step size 1
Saved disp dict for LanB1_frame1000-1200 and step size 1
Saved disp dict for defLanB1_300817_frame200-400 and step size 1
Saved disp dict for defLanB1_300817_frame400-600 and step size 1
