# dev nearest-neighbor tracking with periodic boundary conditions
Tim Tyree<br>
6.29.2022

In [1]:
# use_clear_output=True
# if use_clear_output:
#     from IPython.display import clear_output
#     clear_output(wait=True)

import sys,os
og_stdout=sys.stdout
from lib.my_initialization import *

os.chdir(nb_dir)
from lib import *

# import cupy, cudf
# from lib.rapids_func import *

import seaborn as sns
import matplotlib.ticker as mtick

# #reset matplotlib settings
# import matplotlib as mpl
# sns.reset_orig()
# mpl.rc_file_defaults()



In [2]:
import warnings
warnings.simplefilter("ignore", UserWarning)
%load_ext autoreload
%autoreload 2

In [3]:
#reset matplotlib settings
import matplotlib as mpl
sns.reset_orig()
mpl.rc_file_defaults()

In [4]:
#DONE: implement/dev draft of simple tracker like WJ's in physical notebook
#DONE: add support for creation events
#DONE: add support for a max displacement of particles
#DONE: test on WJ's fortranic tippos
#DONT: repeat with .copy() removed in often repeated locations.  does it still work the same?
#DONE: test routine on fortranic data step by step
#DONE: wrap routine into function 
#DONE: test functional routine on fortranic data with new kernel
#DONE: modify routine_compute_annihilation_range_timeseries_pbc to support use_reverse_time=True

In [50]:
#DONE: wrap this into a foo
#DONE: test that this function is an involution

# define module

In [10]:
def reverse_time(df,t_col='t'):
    """reverse_time inverts the times recorded in df[t_col] inplace and returns df.
    df.index is also reset.
    applying reverse_time three times is the same as applying it once.
    
    Example Usage:
df_reversed = reverse_time(df.copy(),t_col='t')
    """
    ti = df[t_col].min()
    tf = df[t_col].max()
    df[t_col] = tf - df[t_col] + ti
    df.sort_values(by=t_col,inplace=True)
    df.reset_index(inplace=True,drop=True)
    return df

In [11]:
testing=True
if testing:
    trial_num=10
    # log_folder_parquet='/home/timothytyree/Documents/GitHub/care/notebooks/Data/from_wjr/tippos_per_001_log/'
    log_folder_parquet='/home/timothytyree/Documents/GitHub/care/notebooks/Data/from_wjr/positions_fk/200x200/tippos_per_c_005_log/'
    df=load_parquet_by_trial_num(trial_num=trial_num,folder_parquet=log_folder_parquet)
    #test there is exactly one trial in memory
    assert df['trial_num'].drop_duplicates().shape[0]==1

    #DONE test reverse_time is an involution
    df.reset_index(inplace=True,drop=True)
    df_reversed = reverse_time(df.copy(),t_col='t')
    df_reversed = reverse_time(df_reversed,t_col='t')
    df_reversed = reverse_time(df_reversed,t_col='t')
    df_reversed = reverse_time(df_reversed,t_col='t')
    # df_reversed.tail()
    assert ((df_reversed==df).all()['t'])
    print((df_reversed==df).all())
    #NOTE: the order of positions were not preserved
    

index        False
t             True
n             True
x            False
y            False
trial_num     True
dtype: bool


# compute range timeseries for fortranic spiral tip data

In [26]:
# input_dir_lst=[
#     f'{nb_dir}/Data/from_wjr/positions_fk/200x200/tippos_per_c_005',
#     f'{nb_dir}/Data/from_wjr/positions_fk/200x200/tippos_per_c_001',
#     f'{nb_dir}/Data/from_wjr/positions_fk/150x150/tippos_per_c_001',
#     f'{nb_dir}/Data/from_wjr/positions_fk/250x250/tippos_per_001',
#     f'{nb_dir}/Data/from_wjr/positions_fk/200x200/tippos_per_c_005',
# ]

input_dir_lst=[
#     f'{nb_dir}/Data/from_wjr/positions_lr/200x200/tippos_per_001',
    f'{nb_dir}/Data/from_wjr/positions_lr/250x250/tippos_per_c_001',
    f'{nb_dir}/Data/from_wjr/positions_lr/300x300/tippos_per_c_001',
    f'{nb_dir}/Data/from_wjr/positions_lr/350x350/tippos_per_c_001',
    f'{nb_dir}/Data/from_wjr/positions_lr/400x400/tippos_per_c_001',
    f'{nb_dir}/Data/from_wjr/positions_lr/500x500/tippos_per_001',
]

## parse WJ's fortranic spiral tip locations to a parquet cache.
# input_dir=f'{nb_dir}/Data/from_wjr/tippos_per_001'

In [27]:
for input_dir in input_dir_lst:
    assert os.path.exists(input_dir)

In [None]:
use_cache=True

# #creations
# use_reverse_time=True
# use_final_annihilation=False
#annihilations (default)
use_reverse_time=False
use_final_annihilation=True

printing=True
if printing:
    print(f"estimated run time ~4d 10h 11m for tracking annihilations from 4 of wj's fk tip position caches:")

range_data_dir_lst=[]
for input_dir in input_dir_lst:
    # log_folder_parquet=f'{nb_dir}/Data/from_wjr/tippos_per_001_log/'
    log_folder_parquet=input_dir+'_log/'
    if printing:
        print(f"{log_folder_parquet=}")
    #generate cache from src input_dir
    df_log=parse_fortranic_tip_pos(input_dir)
    if not (use_cache & os.path.exists(log_folder_parquet)):
        #partition df_log into a folder of tip logs
        # log_folder_parquet=f'{nb_dir}/Data/from_wjr/tippos_per_001_log/'
        # save_df_to_parquet_by(df_log,log_folder_parquet,by='trial_num',compression='snappy',index=True)
        save_df_to_parquet_by(df_log,log_folder_parquet,by='trial_num',compression='snappy',index=None)
        if printing:
            print(f"saved to spiral tip positions to\n{log_folder_parquet=}")
#     else:
#         trial_num=1
#         df_log=load_parquet_by_trial_num(trial_num=trial_num,folder_parquet=log_folder_parquet)
        
    #determine width and height of the computational domain input the discretization
    width,height=df_log.describe().loc['max'][['x','y']].values.T
    width =  int ( np.around( width  ))
    height = int ( np.around( height ))
    max_trial_num=df_log['trial_num'].max()
    DT=df_log['t'].min()
    #printing=True
    if printing:
        print(df_log.describe().loc[['min','max']][['x','y']])
        print(f"{width=}, {height=}, {DT=}")
    del df_log

    #fortranic spiral tip tracker test case
    testing=True
    if testing:
        trial_num=1
        #log_folder_parquet='/home/timothytyree/Documents/GitHub/care/notebooks/Data/from_wjr/tippos_per_001_log/'
        df=load_parquet_by_trial_num(trial_num=trial_num,folder_parquet=log_folder_parquet)
        #time reversal
        if use_reverse_time:
            #map df <-- df under ./ t --> -t, preserving start/end times
            df = reverse_time(df,t_col='t')

        df_R=routine_compute_annihilation_range_timeseries_pbc(df,DS=0.025,width=int(width),height=int(height),
                        max_disp=30, #15-1000 appears to work
                        max_dist=20, # 20 appears to work
                        min_num_obs=1,
                        use_final_annihilation=bool(use_final_annihilation),printing=False)
        if printing:
            print(df_R.head())

    task_lst=[]
    for trial_num in range(max_trial_num):
        task_lst.append((trial_num,str(log_folder_parquet),int(width),int(height)))
    if printing:
        print(f"added {len(task_lst)} tasks to task_lst!")

    def routine(task):
        trial_num,log_folder_parquet,width,height=task    
        #log_folder_parquet='/home/timothytyree/Documents/GitHub/care/notebooks/Data/from_wjr/tippos_per_001_log/'
        try:
            df=load_parquet_by_trial_num(trial_num=trial_num,folder_parquet=log_folder_parquet)            
            #time reversal
            if use_reverse_time:
                #map df <-- df under ./ t --> -t, preserving start/end times
                df = reverse_time(df,t_col='t')
            
            df_R=routine_compute_annihilation_range_timeseries_pbc(df,DS=0.025,width=int(width),height=int(height),
                            max_disp=30, #15-1000 appears to work
                            max_dist=20, # 20 appears to work
                            min_num_obs=1,
                            use_final_annihilation=bool(use_final_annihilation),printing=False)
            return df_R
        except Exception as e:
            return f"Warning: {e}"

    npartitions=10
    retval=eval_routine_daskbag(routine,task_lst,npartitions,printing=True)
    #NOTE: if this finishes in less than ~53 minutes, then it saves time compared to a simple for loop
    #it ran in ~15 minutes. not bad.

    #parse data
    data_lst=[]
    for rv in retval:
        if type('')==type(rv):
            if printing:
                print(rv)
        elif rv is not None:
            data_lst.append(rv)
    if printing:
        print(f"successfully computed msr data for N={len(data_lst)} termination events.")

    #DONE: save data to pickle
    #input_folder = "/home/timothytyree/Documents/GitHub/care/notebooks/Data/from_wjr/"
    input_folder=os.path.dirname(input_dir)
    if use_reverse_time:
        suffix_str='_creation_range_timeseries.pkl'
    else:
        suffix_str='_annihilation_range_timeseries.pkl'
    output_fn=os.path.basename(input_dir)+suffix_str
    range_data_dir=os.path.join(input_folder,output_fn)
    save_to_pkl(range_data_dir, data_lst)
    if printing:
        print(f"{range_data_dir=}")
    #record
    range_data_dir_lst.append(range_data_dir)

estimated run time ~4d 10h 11m for tracking annihilations from 4 of wj's fk tip position caches:
log_folder_parquet='/home/timothytyree/Documents/GitHub/care/notebooks/Data/from_wjr/positions_lr/250x250/tippos_per_c_001_log/'
saved to spiral tip positions to
log_folder_parquet='/home/timothytyree/Documents/GitHub/care/notebooks/Data/from_wjr/positions_lr/250x250/tippos_per_c_001_log/'
         x      y
min    0.0    0.0
max  250.0  250.0
width=250, height=250, DT=1.0
       annihilation_index         R  tdeath       t  n     x     y  trial_num  \
43544                   0  0.183848     0.0  6239.0  2  11.1  13.3          1   
43542                   0  0.261880     1.0  6238.0  2  10.3  11.5          1   
43540                   0  0.297752     2.0  6237.0  2   9.7  10.2          1   
43538                   0  0.331549     3.0  6236.0  2   9.4   9.5          1   
43536                   0  0.351923     4.0  6235.0  2   9.1   9.1          1   

       frame  
43544   6240  
43542   623

In [None]:
# range_data_dir='/home/timothytyree/Documents/GitHub/care/notebooks/Data/from_wjr/positions_fk/200x200/tippos_per_c_005_creation_range_timeseries.pkl'
# range_data_dir='/home/timothytyree/Documents/GitHub/care/notebooks/Data/from_wjr/positions_fk/250x250/tippos_per_001_creation_range_timeseries.pkl'


In [None]:
# DONE: add support for creation range timeseries
#heretim
print(*range_data_dir_lst)

In [None]:
beep(7)

## etc

In [None]:
# range_data_dir='/home/timothytyree/Documents/GitHub/care/notebooks/Data/from_wjr/positions_fk/200x200/tippos_per_c_001_annihilation_range_timeseries.pkl'
# range_data_dir='/home/timothytyree/Documents/GitHub/care/notebooks/Data/from_wjr/positions_fk/150x150/tippos_per_c_001_annihilation_range_timeseries.pkl'
!rm /home/timothytyree/Documents/GitHub/care/notebooks/Data/from_wjr/positions_fk/150x150/tippos_per_c_001_annihilation_range_timeseries.pkl


In [38]:
def routine(task):
    trial_num=task    
    log_folder_parquet='/home/timothytyree/Documents/GitHub/care/notebooks/Data/from_wjr/tippos_per_001_log/'
    try:
        df=load_parquet_by_trial_num(trial_num=trial_num,folder_parquet=log_folder_parquet)
        df_R=routine_compute_annihilation_range_timeseries_pbc(df,DS=0.025,width=200,height=200,
                        max_disp=30, #15-1000 appears to work
                        max_dist=20, # appears to work
                        min_num_obs=1,use_final_annihilation=True,printing=False)
        return df_R
    except Exception as e:
        return f"Warning: {e}"

In [39]:
npartitions=11
retval=eval_routine_daskbag(routine,task_lst,npartitions,printing=True)
#NOTE: if this finishes in less than ~53 minutes, then it saves time compared to a simple for loop
#it ran in ~15 minutes. not bad.

run time for evaluating routine was 930.61 seconds, yielding 641 values returned


In [40]:
#parse data
data_lst=[]
for rv in retval:
    if type('')==type(rv):
        print(rv)
    elif rv is not None:
        data_lst.append(rv)
print(f"successfully computed msr data for N={len(data_lst)} termination events.")

successfully computed msr data for N=595 termiantion events.


In [41]:
#DONE: save data to pickle
input_folder = "/home/timothytyree/Documents/GitHub/care/notebooks/Data/from_wjr/"
output_fn='tippos_per_001'+'_annihilation_range_timeseries.pkl'
range_data_dir=os.path.join(input_folder,output_fn)
save_to_pkl(range_data_dir, data_lst)
print(f"{range_data_dir=}")

range_data_dir='/home/timothytyree/Documents/GitHub/care/notebooks/Data/from_wjr/tippos_per_001_annihilation_range_timeseries.pkl'


In [33]:
# task_lst=[]
# for trial_num in range(641):
#     task_lst.append(trial_num)
# print(f"added {len(task_lst)} tasks to task_lst!")

# def routine(task):
#     trial_num=task    
#     log_folder_parquet='/home/timothytyree/Documents/GitHub/care/notebooks/Data/from_wjr/tippos_per_001_log/'
#     try:
#         df=load_parquet_by_trial_num(trial_num=trial_num,folder_parquet=log_folder_parquet)
#         df_R=routine_compute_annihilation_range_timeseries_pbc(df,DS=0.025,width=200,height=200,
#                         max_disp=30, #15-1000 appears to work
#                         max_dist=20, # appears to work
#                         min_num_obs=1,use_final_annihilation=True,printing=False)
#         return df_R
#     except Exception as e:
#         return f"Warning: {e}"

# npartitions=10
# retval=eval_routine_daskbag(routine,task_lst,npartitions,printing=True)
# #NOTE: if this finishes in less than ~53 minutes, then it saves time compared to a simple for loop
# #it ran in ~15 minutes. not bad.

# #parse data
# data_lst=[]
# for rv in retval:
#     if type('')==type(rv):
#         print(rv)
#     elif rv is not None:
#         data_lst.append(rv)
# print(f"successfully computed msr data for N={len(data_lst)} termination events.")

# #DONE: save data to pickle
# input_folder = "/home/timothytyree/Documents/GitHub/care/notebooks/Data/from_wjr/"
# output_fn='tippos_per_001'+'_annihilation_range_timeseries.pkl'
# range_data_dir=os.path.join(input_folder,output_fn)
# save_to_pkl(range_data_dir, data_lst)
# print(f"{range_data_dir=}")

added 641 tasks to task_lst!


# compute range timeseries for pythonic spiral tip data

In [4]:
def drop_every_other_time_point(df):
    """
    Example Usage:
drop_every_other_time_point(df)
    """
    t_values_keep = df['t'].drop_duplicates()[1::2].values
    boo_keep = df['t']<-9999
    for t in t_values_keep:
        boo_keep |= df['t']==t
    df = df[boo_keep].copy()
    return df

In [6]:
# search_for_file()

In [7]:
#completed
# input_folder='/home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-suite-3-LR/ds_5_param_qu_tmax_30_Ko_5.4_diffCoef_0.001/Log/'
# fn_lst=[os.path.join(input_folder,x) for x in sorted(os.listdir(input_folder))]
# fn_lst[0],len(fn_lst)
# print(f"{input_folder=}")

In [25]:
# input_dir='/home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-suite-3-LR/ds_5_param_qu_tmax_30_Ko_5.4_diffCoef_0.001/Log/ic001.12_log.csv'
input_dir='/home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-suite-3-LR/param_qu_tmax_30_Ko_5.4_diffCoef_0.0005_dt_0.5/Log/ic001.21_log.csv'
df=load_tip_pos_from_csv(input_dir,round_t_to_n_digits=7,printing=True)
df.head()

before drop_duplicates: df.shape=(133269, 8)
after drop_duplicates: df.shape=(133269, 8)


Unnamed: 0,index,t,n,x,y,grad_ux,grad_uy,grad_vx,grad_vy
0,0,0.5,8,30.970498,191.730527,-0.066684,1.172466,6.688934,0.077292
1,1,0.5,8,62.724662,158.404917,2.426485,0.433045,-1.436105,1.967648
2,2,0.5,8,86.944705,168.813958,-0.650947,1.850735,4.180481,-0.643616
3,3,0.5,8,114.748439,142.986244,0.324528,1.543465,1.744883,0.402295
4,4,0.5,8,133.438658,6.224695,1.012537,0.720855,1.102857,-1.016976


In [26]:
width,height=df.describe().loc['max'][['x','y']].values.T

DT=df['t'].min()

# DT=sorted(df['t'].drop_duplicates().values)[1]
printing=True
if printing:
    print(df.describe().loc[['min','max']][['x','y']])
    print(f"{width=}, {height=}, {DT=}")
# del df_log

              x           y
min    0.000990    0.000959
max  199.998708  199.999086
width=199.9987081729576, height=199.9990858828612, DT=0.5


In [48]:
input_folder=os.path.dirname(input_dir)
# print(f"{input_folder=}")

input_folder='/home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-suite-3-LR/param_qu_tmax_30_Ko_5.4_diffCoef_0.0005_dt_0.5/Log'


In [27]:
input_dir='/home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-suite-3-LR/param_qu_tmax_30_Ko_5.4_diffCoef_0.0005_dt_0.5/Log/ic001.21_log.csv'


In [39]:
#needs filtering by .find('_log.csv')
input_dir_lst=[
    '/home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-suite-3-LR/param_qu_tmax_30_Ko_5.4_diffCoef_0.0005_dt_0.5/Log/ic001.21_log.csv',
    #i think the following (first) one is what i used for the FK model
    '/home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-fk-200x200/param_set_8_ds_5.0_tmax_10_diffCoef_0.0005/Log/ic200x200.0.1_log.csv',
    '/home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-fk-200x200/param_set_8_ds_5.0_tmax_30_diffCoef_0.001_dt_1/Log/ic200x200.0.1_log.csv',    
    '/home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-fk-200x200/param_set_8_ds_5.0_tmax_10_diffCoef_0.0005/Log/ic200x200.0.0_log.csv',
    '/home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-fk-200x200/param_set_8_ds_5.0_tmax_30_diffCoef_0.001_dt_0.025/Log/ic200x200.0.0_log.csv'
]

In [40]:
#compute range time series data and save to cache for a list of trial types
range_data_dir_lst=[]
for input_dir in input_dir_lst:
    #find files with '_log.csv' filtering
    input_folder=os.path.dirname(input_dir)
    fn_lst=[os.path.join(input_folder,x) for x in sorted(os.listdir(input_folder)) if x.find('_log.csv')!=-1]
    print(f"found {len(fn_lst)} files comparable to {fn_lst[0]}")

    task_lst=[]
    for trial_num,fn in enumerate(fn_lst):
        task=trial_num,fn
        task_lst.append(task)
    print(f"added {len(task_lst)} tasks to task_lst!")

    #pythonic spiral tip tracker test case
    testing=False
    if testing:
        task = task_lst[0]
        trial_num,input_dir = task
        #load data
        df=load_tip_pos_from_csv(input_dir,round_t_to_n_digits=7,printing=False)
        #if DT<1 ms, double it until DT>=1 ms, and then drop that many members from df
        DT=df['t'].min()
        while DT<1:
            df = drop_every_other_time_point(df).copy()
            DT=df['t'].min()
        #perform routine
        df_R=routine_compute_annihilation_range_timeseries_pbc(df,DS=0.025,width=200,height=200,
                        max_disp=30, #15-1000 appears to work
                        max_dist=20, # appears to work
                        min_num_obs=1,use_final_annihilation=True,printing=True)
        df_R['trial_num']=trial_num
        print(df_R.head())

    def routine(task):
        trial_num,input_dir = task
        try:
            #load data
            df=load_tip_pos_from_csv(input_dir,round_t_to_n_digits=7,printing=False)
            #if DT<1 ms, double it until DT>=1 ms, and then drop that many members from df
            DT=df['t'].min()
            while DT<1:
                df = drop_every_other_time_point(df).copy()
                DT=df['t'].min()
            #perform routine
            df_R=routine_compute_annihilation_range_timeseries_pbc(df,DS=0.025,width=200,height=200,
                            max_disp=30, #15-1000 appears to work
                            max_dist=20, # appears to work
                            min_num_obs=1,use_final_annihilation=True,printing=False)
            df_R['trial_num']=trial_num
            return df_R
        except Exception as e:
            return f"Warning: {e}"

    npartitions=8
    printing=True
    if npartitions>1:
        bag = db.from_sequence(task_lst, npartitions=npartitions).map(routine)
        start = time.time()
        retval = list(bag)
    else:
        start=time.time()
        retval=[]
        for task in task_lst:
            retval.append(routine(task))
    if printing:
        print(f"run time for evaluating routine was {time.time()-start:.2f} seconds, yielding {len(retval)} values returned")

    #parse data
    data_lst=[]
    for rv in retval:
        if type('')==type(rv):
            print(rv)
        elif rv is not None:
            data_lst.append(rv)
    print(f"successfully computed msr data for N={len(data_lst)} termination events.")

    #DONE: save data to pickle
    # input_folder = "/home/timothytyree/Documents/GitHub/care/notebooks/Data/from_wjr/"
    # input_folder='/home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-suite-3-LR/ds_5_param_qu_tmax_30_Ko_5.4_diffCoef_0.001'#/Log/'
    save_folder=os.path.dirname(os.path.dirname(input_dir))
    output_fn='annihilation_range_timeseries.pkl'
    range_data_dir=os.path.join(save_folder,output_fn)
    save_to_pkl(range_data_dir, data_lst)
    print(f"*** {range_data_dir=}")
    range_data_dir_lst.append(range_data_dir)
    del data_lst
    del retval

found 159 files comparable to /home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-suite-3-LR/param_qu_tmax_30_Ko_5.4_diffCoef_0.0005_dt_0.5/Log/ic001.11_log.csv
added 159 tasks to task_lst!
run time for evaluating routine was 2478.27 seconds, yielding 159 values returned
successfully computed msr data for N=129 termination events.
*** range_data_dir='/home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-suite-3-LR/param_qu_tmax_30_Ko_5.4_diffCoef_0.0005_dt_0.5/annihilation_range_timeseries.pkl'
found 324 files comparable to /home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-fk-200x200/param_set_8_ds_5.0_tmax_10_diffCoef_0.0005/Log/ic200x200.0.0_log.csv
added 324 tasks to task_lst!
run time for evaluating routine was 2116.09 seconds, yielding 324 values returned
successfully computed msr data for N=121 termination events.
*** range_data_dir='/home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-fk-2

In [41]:
for range_data_dir in range_data_dir_lst:
    print(f"{range_data_dir=}")
beep(3)

range_data_dir='/home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-suite-3-LR/param_qu_tmax_30_Ko_5.4_diffCoef_0.0005_dt_0.5/annihilation_range_timeseries.pkl'
range_data_dir='/home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-fk-200x200/param_set_8_ds_5.0_tmax_10_diffCoef_0.0005/annihilation_range_timeseries.pkl'
range_data_dir='/home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-fk-200x200/param_set_8_ds_5.0_tmax_30_diffCoef_0.001_dt_1/annihilation_range_timeseries.pkl'
range_data_dir='/home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-fk-200x200/param_set_8_ds_5.0_tmax_10_diffCoef_0.0005/annihilation_range_timeseries.pkl'
range_data_dir='/home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-fk-200x200/param_set_8_ds_5.0_tmax_30_diffCoef_0.001_dt_0.025/annihilation_range_timeseries.pkl'


0

# evaluate track_particle_annihilations_pbc_nearest_neighbors_simple (can be used for MSD and MSR after creation)

In [None]:
df=track_particle_annihilations_pbc_nearest_neighbors_simple(df,width=200,height=200,
                max_disp=30, #15-1000 appears to work
                max_dist=20, # appears to work
                min_num_obs=1,use_final_annihilation=True,printing=True)

In [4]:
# # #DONE: load the data from bgmc that has pid_explicit
# # #HINT: its on ub in bgmc/python/data/local_results
# search_for_file()

# #linear particle model test case
# input_dir='/home/timothytyree/Documents/GitHub/bgmc/python/data/local_results/euic_False_fc_2_r_0.1_D_2_L_10_kappa_1500_varkappa_5/Log/pbc_particle_log2_log.csv'
# df=load_tip_pos_from_csv(input_dir,round_t_to_n_digits=round_t_to_n_digits,printing=False)
# round_t_to_n_digits=7
# min_num_obs=10
# min_dist=2. #pixels
# # width=200
# # height=200 
# width=10
# height=10
# from collections import deque


File: /home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-suite-3-LR/ds_5_param_qu_tmax_30_Ko_5.4_diffCoef_0.001/Log/ic001.12_log.csv


'/home/timothytyree/Documents/GitHub/care/notebooks/Data/initial-conditions-suite-3-LR/ds_5_param_qu_tmax_30_Ko_5.4_diffCoef_0.001/Log/ic001.12_log.csv'

In [None]:
max_index_annihilation=df['index_self'].max()
fontsize=14
fig,axs=plt.subplots(ncols=2,figsize=(9,4))
ax=axs[0]
for index_annihilation in range(max_index_annihilation):
    df[df['index_self']==index_annihilation].plot(x='x',y='y',color=f'C{index_annihilation}',ax=ax,alpha=0.5,legend=None)
    df[df['index_other']==index_annihilation].plot(x='x',y='y',color=f'C{index_annihilation*2}',ax=ax,alpha=0.5,legend=None)
df.plot.scatter(x='x',y='y',color=f'k',ax=ax,alpha=0.2,legend=None,s=10)
format_plot(ax=ax,xlabel="x (pixels)",ylabel='y (pixels)', fontsize=fontsize)
ax=axs[1]
for index_annihilation in range(max_index_annihilation):
    df[df['index_self']==index_annihilation].plot(x='x',y='y',color=f'C{index_annihilation}',ax=ax,alpha=0.5,legend=None)
    df[df['index_other']==index_annihilation].plot(x='x',y='y',color=f'C{index_annihilation*2}',ax=ax,alpha=0.5,legend=None)

format_plot(ax=ax,xlabel="x (pixels)",ylabel='y (pixels)', fontsize=fontsize)
plt.tight_layout()
plt.show()

In [None]:
fig,ax=plt.subplots()
max_index_annihilation=df['index_self'].max()
for index_annihilation in range(max_index_annihilation):
    df[(df['index_self']==index_annihilation)&(df['dist']>0)].\
    plot(x='t',y='dist',color=f'C{index_annihilation}',ax=ax,alpha=0.5, legend=None)
# df.tail(8).plot.scatter(x='x',y='y',color='g',ax=ax)
# plt.legend()
plt.show()

In [None]:
fontsize=14
df=compute_annihilation_range_timeseries(df,DS=0.025)
fig,ax=plt.subplots(figsize=(6,4))
max_index_annihilation=df['index_self'].max()
for index_annihilation in range(max_index_annihilation):
    df[(df['index_self']==index_annihilation)&(df['dist']>0)].\
    plot(x='tdeath',y='R',color=f'C{index_annihilation}',ax=ax,alpha=0.5, legend=None)
# df.tail(8).plot.scatter(x='x',y='y',color='g',ax=ax)
format_plot(ax=ax,xlabel="t' (ms)",ylabel='R (cm)', fontsize=fontsize)
plt.show()
print(f"{trial_num=}, {max_disp=}")

# old tests of dev on linear particle model

In [None]:
#reset matplotlib settings
import matplotlib as mpl
sns.reset_orig()
mpl.rc_file_defaults()

In [None]:
# index_annihilation_plot=index_annihilation-1
index_annihilation_plot=0
df.loc[df['index_other']==index_annihilation_plot,'index_other'].shape

In [None]:
#plot the points i just computed and verify they look reasonable
fig,ax=plt.subplots(figsize=(5,5))
df[df['index_self']==index_annihilation_plot].plot.scatter(x='x',y='y',ax=ax,color='r',alpha=0.6,marker='v',s=2)
df[df['index_other']==index_annihilation_plot].plot.scatter(x='x',y='y',ax=ax,color='g',alpha=0.6,marker='^',s=2)
d_prev.plot.scatter(x='x',y='y',ax=ax,color='C0',alpha=0.2,marker='o',s=300)
d_next.plot.scatter(x='x',y='y',ax=ax,color='C1',alpha=0.2,marker='o',s=300)
format_plot(ax=ax,xlabel='x (cm)',ylabel='y (cm)')
plt.show()

In [None]:
#plot the points i just computed and verify they look reasonable
fig,ax=plt.subplots(figsize=(5,5))
for index_annihilation_plot in range (8):
    df[df['index_self']==index_annihilation_plot].plot.scatter(x='x',y='y',ax=ax,color=f'C{index_annihilation_plot}',alpha=0.9,marker='v',s=2)
    df[df['index_other']==index_annihilation_plot].plot.scatter(x='x',y='y',ax=ax,color=f'C{index_annihilation_plot*2}',alpha=0.9,marker='.',s=2)
# d_prev.plot.scatter(x='x',y='y',ax=ax,color='C0',alpha=0.2,marker='o',s=300)
# d_next.plot.scatter(x='x',y='y',ax=ax,color='C1',alpha=0.2,marker='o',s=300)
format_plot(ax=ax,xlabel='x (cm)',ylabel='y (cm)')
plt.show()

In [None]:
#DONE: test for equivalence between my particle tracking and the ground truth
#HINT: make dict between particle and pid_explicit, asserting that particle maps to one and only one pid_explicit
#now the big test:
# index_annihilation_test=0
index_annihilation_test=index_annihilation_plot
print(f"Do no more than two known particle identities get assigned to {index_annihilation_test=}?")
pid_explicit_set_lst_self=list(set(df.loc[df['index_self']==index_annihilation_plot,'pid_explicit'].values))
pid_explicit_set_lst_other=list(set(df.loc[df['index_other']==index_annihilation_plot,'pid_explicit'].values))
print(f"{pid_explicit_set_lst_self=}")
print(f"{pid_explicit_set_lst_other=}")
print(f"{num_obs=} is greater than {min_num_obs=}, so this pair-annihilation event is _kept_.")

In [None]:
#TODO: simple test for whether results for an example R(t') look reasonable
#plot the points i just computed and verify they look reasonable
fig,ax=plt.subplots(figsize=(5,5))
d=df.loc[df['index_self']==index_annihilation_plot]
d.plot.scatter(x='t',y='dist',ax=ax,color='r',alpha=0.6,marker='v',s=2)
# df[df['index_self']==index_annihilation_plot].plot.scatter(x='x',y='y',ax=ax,color='r',alpha=0.6,marker='v',s=2)
# df[df['index_other']==index_annihilation_plot].plot.scatter(x='x',y='y',ax=ax,color='g',alpha=0.6,marker='^',s=2)
# d_prev.plot.scatter(x='x',y='y',ax=ax,color='C0',alpha=0.2,marker='o',s=300)
# d_next.plot.scatter(x='x',y='y',ax=ax,color='C1',alpha=0.2,marker='o',s=300)
format_plot(ax=ax,xlabel='t (ms)',ylabel='R (cm)')
plt.show()

In [None]:
#Q: why is the final; distance so far away?
#A: it's just where i killed the simulation, man! Don't worry about it
#TODO: add check to set working=False if the initial distance between particles is greater than a certain amount

In [None]:
d.tail()

In [None]:
df.tail()

In [None]:
#DONE: ROOT CAUSE ANALYSIS: why is index 3 not being returned?
#YEP. #ROOT CAUSE IDENTIFIED: recall annihilating particles vibrate around eachother as they annihilate
#NOPE. #POTENTIAL ROOT CAUSE: map_prev_next,map_next_prev are not inverses of each other
# #simplest fix: recompute the other if either is found
# id_pair_lst=find_nearest_neighbors_simple(d_prev,d_next,distance_L2_pbc)#,xy_col_lst=['x','y'],**kwargs)
# assert len(id_pair_lst)==d_prev.shape[0]

In [None]:
#DONT(this would clearly break it): simple test for whether results for an example R(t') stop looking reasonable if i do:
# d_next=df[df['frame']==frame_next].copy()
# d_next=df[df['frame']==frame_next]