In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import os,sys,glob,random, re, copy, time, itertools, random, csv
from tqdm.notebook import trange, tqdm
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import toml
sns.set_theme()
sns.set()

from extend_robot_data import extend_robot_data
from plot import plot_all_positions, plot_runs, circular_hist, plot_starts_ends, plot_rotations_and_heatmap, plot_inter_individual_distances, plot_run_length_hist, plot_robot_distance_goal, plot_runlength_dist_goal_target_corr
from plot_usage_statistics import plot_weekday_business, plot_daily_number_runs, plot_time_of_day_histogram, plot_daily_use_times_and_operational_times, plot_daily_start_end_times
from util import save_date_dict_to_csv, get_fast_runs, get_fish_dir_per_run, filter_dates_dict_for_challenge_runs, flatten_2d_list, get_fish_following_per_run, distance, get_fish_pos_per_run, get_challenge_runs, get_successful_runs, get_distance_to_goal, save_dates_to_npz, load_dates_from_npz, get_hours_minutes_seconds_from_decimal_hours, calculate_run_velocity_speed_acceleration
from iterator_functions import iteratively_evaluate_dates_files, iteratively_evaluate_all_runs_in_dates_dict, forward_rolling_window_apply, iteratively_evaluate_dates_files_runwise, iteratively_collect_and_filter_dates_files
from analyze_data import calculate_tortuosity, get_run_turns_curvature, get_run_turns_towards_target

from load_data import load_robot_data, load_behavior_data, load_fish_data
from clean_data import clean_data
    



### Choose range of dates you want to load

In [None]:
# specify start and end date (min: 2021-11-19 ; max: 2022-10-25)
start_date = "2022-10-19"
end_date = "2022-10-25"

fast_runs_percentiles = [200,20.721,15.8846] #values of the run length percentiles 100%, 25% and 10% 


# Load data

#### either: load already parsed files

- these must reside in ./loaded_data/

In [None]:
dates_dict = None
dates_dict = load_dates_from_npz(start_date, end_date, only_challenges=True)
# Extend robot data
dates_dict = extend_robot_data(dates_dict)

#### or: parse form logs

- set the paths of the files first

In [None]:
debug=True

# parse fom logs

# directories
base_dir="E:/tubCloud/HF_logs/"
# base_dir="C:/Users/admin/repos/thesis/logs_16_03_22/"

fish_dir=(base_dir+"fish")
robot_dir=(base_dir+"robot")

behavior_dir=(base_dir+"behavior_prints")
user_input_dir=(base_dir+"user_input")

# Load robot data
dates_dict_robot = load_robot_data(robot_dir, start_date, end_date)

dates_dict_robot[end_date].keys()
dates_dict = copy.deepcopy(dates_dict_robot)

# load behavior print data
dates_dict = load_behavior_data(behavior_dir, dates_dict, start_date, end_date)

# load fish data
dates_dict = load_fish_data(fish_dir, dates_dict, start_date, end_date)

# clean data: check if number of fish stay the same in challenge runs and check if target fish starts in correct pos
dates_dict = clean_data(dates_dict, debug=debug)

# Extend robot data
dates_dict = extend_robot_data(dates_dict)

print(f"\nDone!")

In [None]:
# challenges and runs same length


In [None]:
# detect position jumps (= high acceleration)
dates_keys = dates_dict.keys()

for date_key in dates_keys:    
    date_dict = dates_dict[date_key]
    
    runs = date_dict["runs"]
    for id_run, run in enumerate(runs):
        run_accelerations = date_dict["accelerations"][id_run]
        # plt.plot(run_accelerations)
        # plt.show()

        # find accerations over 200
        ids_over_accelerations = np.where(np.abs(run_accelerations) > 200)

In [None]:
# detect rotation jumps


In [None]:
# print(dates_dict['2022-02-02']["runs"])
test = {'timestamps': dates_dict['2022-02-02']['timestamps'], 'positions': dates_dict['2022-02-02']['positions'], 'fish': dates_dict['2022-02-02']['fish']}
pd.DataFrame.from_dict(test).head(100)

In [None]:
### check for id skips in run timestamps
for date_key in dates_dict.keys():
    date_dict = dates_dict[date_key]
    date_dict_runs = date_dict["runs"]
    date_dict_ts = date_dict["timestamps"]
    
    start_pointer = 0
    for id_run, run in enumerate(date_dict_runs):
        if start_pointer != run[0]:
            print(f"timestamps skipped in run {run}: prev-run: {date_dict_runs[id_run-1]}, end last +1: {date_dict_ts[start_pointer]}; run start:{date_dict_ts[run[0]]}")
            print(f"\t{start_pointer - run[0]}")
        start_pointer = run[1]+1

In [None]:
### challenge runs cannot be longer than ~180 seconds
for date_key in dates_dict.keys():
    date_dict = dates_dict[date_key]
    date_dict_runs = date_dict["runs"]
    
    if len(date_dict_runs) > 0:
        # set non challenge runs to zero length for this check
        date_run_lengths_masked = np.ma.array(date_dict["run_lengths"], mask=np.invert(date_dict["challenges"]), copy=True, fill_value=0)
        date_run_lengths_zerofilled = np.ma.filled(date_run_lengths_masked)
        # print(date_run_lengths_zerofilled)
        # date_c_run_lengths = np.array(date_dict["run_lengths"])[date_dict["challenges"]]
        # print(date_c_run_lengths)
        long_c_runs = np.where(date_run_lengths_zerofilled > 185)[0]

        # manage too long runs
        if len(long_c_runs) > 0:
            print(date_run_lengths_zerofilled[long_c_runs])
            print(long_c_runs)
            for long_run in long_c_runs:
                long_run_start = date_dict["timestamps"][date_dict_runs[long_run][0]]
                long_run_end = date_dict["timestamps"][date_dict_runs[long_run][1]]
                print(f"long run at: {long_run_start} : {long_run_end}")


# Sample data

- functions to generate sample data

In [None]:
def filter_dates_and_sample_n_runs(start_date=None, end_date=None, load_only_challenge_data=True, challenges=True, only_successful=True, percentile_value=200, sample_size=100, plot=False, verbose=False):
    dates_dict = iteratively_collect_and_filter_dates_files(start_date, end_date, load_only_challenge_data, percentile_value=percentile_value, challenges=challenges, only_successful=only_successful, verbose=verbose)
    
    # count number of runs
    num_runs = 0
    all_runs = []
    all_num_runs = []
    for date_dict_key in dates_dict:
        date_dict = dates_dict[date_dict_key]
        runs = date_dict["runs"]
        num_runs += len(runs)
        annotated_runs = [(date_dict_key, run) for run in runs]
        all_runs.extend(annotated_runs)
        all_num_runs.append(len(runs))
    # create sample of runs
    sample_ids = sorted(random.sample(list(range(num_runs)), min(num_runs, sample_size)))
    # collect sample runs per date
    sample_runs = np.array(all_runs, dtype=object)[sample_ids]
    sample_runs_by_date = {}
    for id_sample_run, sample_run in enumerate(sample_runs):
        # get sample id for date 
        sample_id = sample_ids[id_sample_run]
        run_id = sample_id
        cum_num_run = 0
        deduction = 0
        for num_run in all_num_runs:
            cum_num_run += num_run
            if run_id < cum_num_run:
                run_id = run_id - deduction
                break
            deduction += num_run
        
        # collect date runs
        date_run_samples = sample_runs_by_date.get(sample_run[0], [])
        date_run_samples.append((run_id,sample_run[1]))
        sample_runs_by_date[sample_run[0]] = date_run_samples
        
        
    # filter dates_dict
    sampled_dates_dict = {}
    for date_key in sample_runs_by_date:
        date_samples = np.array(sample_runs_by_date[date_key], dtype=object)
        filtered_date_dict = filter_date_dict_by_specific_runs(date_dict=dates_dict[date_key], ids_runs=date_samples[:,0].tolist(), runs=date_samples[:,1])
        sampled_dates_dict[date_key] = filtered_date_dict

    # save date dicts to csv
    for date_key in sampled_dates_dict:
        date_dict = sampled_dates_dict[date_key]
        output_path = f"./csv/{date_key}"
        _,_ = save_date_dict_to_csv(date_dict, output_path)
        
def filter_date_dict_by_specific_runs(date_dict, ids_runs, runs):
    filtered_dict = date_dict    
    # apply filter to all dict arrays
    filtered_dict['run_lengths'] = np.array(filtered_dict['run_lengths'])[ids_runs]
    filtered_dict['difficulties'] = np.array(filtered_dict['difficulties'])[ids_runs]
    filtered_dict['challenges'] = np.array(filtered_dict['challenges'])[ids_runs]
    filtered_dict['successful'] = np.array(filtered_dict['successful'])[ids_runs]
    
    filtered_timestamps = []
    filtered_positions = []
    filtered_orientation = []
    filtered_rotation = []
    filtered_fish = []
    for run in runs:
        filtered_timestamps.extend(filtered_dict['timestamps'][run[0]:run[1]+1])
        filtered_positions.extend(filtered_dict['positions'][run[0]:run[1]+1])
        filtered_orientation.extend(filtered_dict['orientation'][run[0]:run[1]+1])
        filtered_rotation.extend(filtered_dict['rotation'][run[0]:run[1]+1])
        filtered_fish.extend(filtered_dict['fish'][run[0]:run[1]+1])
    filtered_dict['timestamps'] = filtered_timestamps
    filtered_dict['positions'] = filtered_positions
    filtered_dict['orientation'] = filtered_orientation
    filtered_dict['rotation'] = filtered_rotation
    filtered_dict['fish'] = filtered_fish
    
    # zero runs
    zeroed_runs = []
    current_index = 0
    for run in runs:
        zeroed_runs.append([current_index, current_index+(run[1]-run[0])])
        current_index = current_index+(run[1]-run[0])+1
    filtered_dict['runs'] = zeroed_runs
    
    return filtered_dict

In [None]:
start_date = "2021-11-19"
end_date = "2022-10-25"
filter_dates_and_sample_n_runs(start_date=start_date, end_date=end_date, load_only_challenge_data=True, challenges=True, only_successful=True, percentile_value=fast_runs_percentiles[1], sample_size=1000, verbose=False)

***
# Find turns

In [None]:
from analyze_data import get_run_turns_towards_target

def find_all_turns_in_dates_dict(dates_dict, start_date=None, end_date=None, challenges=True, only_successful=True, fast_runs_percentile=25, plot=True, verbose=False):
    all_returns = iteratively_evaluate_all_runs_in_dates_dict(eval_run_function=find_turns_torwards_target_in_run, dates_dict= dates_dict, start_date=start_date, end_date=start_date, challenges=challenges, only_successful=only_successful, fast_runs_percentile=fast_runs_percentile, run_arguments=None, plot=plot, verbose=verbose)
    all_returns = np.asarray(all_returns, dtype='object')
    
    all_turning_points_towards_target = np.hstack(all_returns[:,0])
    all_dist_at_turning_points_towards_target = np.hstack(all_returns[:,1])
    all_dist_goal_at_turning_points_towards_target = np.hstack(all_returns[:,2])
    if plot:        
        # target
        f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(30,5))
        
        ax1.hist(all_dist_at_turning_points_towards_target, bins=100)
        ax1.set_title("histogram turning points towards target at distances to target fish")
        ax1.set_xlabel("robot distance to target fish")
        ax1.set_ylabel("number of turning points at distance X")

        ax2.hist(all_dist_goal_at_turning_points_towards_target, bins=100)
        ax2.set_title("histogram turning points towards target at distances to goal zone")
        ax2.set_xlabel("robot distance to goal")
        ax2.set_ylabel("number of turning points at distance X")

        all_dist_goal_at_turning_points_towards_target = np.asarray(all_dist_goal_at_turning_points_towards_target)
        zero_filtered_dists = all_dist_goal_at_turning_points_towards_target[all_dist_goal_at_turning_points_towards_target>0]
        ax3.hist(zero_filtered_dists, bins=100)
        ax3.set_title("histogram turning points towards target at distances to goal zone excluding robot in goal zone")
        ax3.set_xlabel("robot distance to goal")
        ax3.set_ylabel("number of turning points at distance X")
        
        plt.show()
    
    return all_turning_points_towards_target, all_dist_at_turning_points_towards_target, all_dist_goal_at_turning_points_towards_target
    
def find_all_turns_in_dates_files(start_date=None, end_date=None, load_only_challenge_data=True, challenges=True, only_successful=True, fast_runs_percentiles=None, plot=False, verbose=False):
    all_returns = iteratively_evaluate_dates_files_runwise(eval_run_function=find_turns_torwards_target_in_run, start_date=start_date, end_date=end_date, load_only_challenge_data=load_only_challenge_data, challenges=challenges, only_successful=only_successful, fast_runs_percentiles=fast_runs_percentiles, run_arguments=None, plot=plot, verbose=verbose)
    all_returns = np.asarray(all_returns, dtype='object')
    

    if fast_runs_percentiles is not None:
        for id_fast_runs_percentile, fast_runs_percentile in enumerate(fast_runs_percentiles):
            turning_points_towards_target = np.hstack(np.array(all_returns[id_fast_runs_percentile],dtype=object)[:,0])
            dist_at_turning_points_towards_target = np.hstack(np.array(all_returns[id_fast_runs_percentile],dtype=object)[:,1])
            dist_goal_at_turning_points_towards_target = np.hstack(np.array(all_returns[id_fast_runs_percentile],dtype=object)[:,2])
            difficulties = np.array(all_returns[id_fast_runs_percentile],dtype=object)[:,3]
            print(f"number of all runs: {len(np.array(all_returns[id_fast_runs_percentile],dtype=object)[:,0])}")
            print(f"number of all turns: {len(turning_points_towards_target)}")
            print(f"turns per run : {len(turning_points_towards_target) / len(np.array(all_returns[id_fast_runs_percentile],dtype=object)[:,0])}\n")
 
            
            easy_turning_points_towards_target = np.hstack(np.array(all_returns[id_fast_runs_percentile],dtype=object)[difficulties==2][:,0])
            easy_dist_at_turning_points_towards_target = np.hstack(np.array(all_returns[id_fast_runs_percentile],dtype=object)[difficulties==2][:,1])
            easy_dist_goal_at_turning_points_towards_target = np.hstack(np.array(all_returns[id_fast_runs_percentile],dtype=object)[difficulties==2][:,2])
            print(f"number of easy runs: {len(np.array(all_returns[id_fast_runs_percentile],dtype=object)[difficulties==2][:,0])}")         
            print(f"number of easy turns: {len(easy_turning_points_towards_target)}")
            print(f"easy turns per run : {len(easy_turning_points_towards_target) / len(np.array(all_returns[id_fast_runs_percentile],dtype=object)[difficulties==2][:,0])}\n")
            
            
            medium_turning_points_towards_target = np.hstack(np.array(all_returns[id_fast_runs_percentile],dtype=object)[difficulties==4][:,0])
            medium_dist_at_turning_points_towards_target = np.hstack(np.array(all_returns[id_fast_runs_percentile],dtype=object)[difficulties==4][:,1])
            medium_dist_goal_at_turning_points_towards_target = np.hstack(np.array(all_returns[id_fast_runs_percentile],dtype=object)[difficulties==4][:,2])
            print(f"number of medium runs: {len(np.array(all_returns[id_fast_runs_percentile],dtype=object)[difficulties==4][:,0])}")
            print(f"number of medium turns: {len(medium_turning_points_towards_target)}")
            print(f"medium turns per run : {len(medium_turning_points_towards_target) / len(np.array(all_returns[id_fast_runs_percentile],dtype=object)[difficulties==4][:,0])}\n")
            
            hard_turning_points_towards_target = np.hstack(np.array(all_returns[id_fast_runs_percentile],dtype=object)[difficulties==6][:,0])
            hard_dist_at_turning_points_towards_target = np.hstack(np.array(all_returns[id_fast_runs_percentile],dtype=object)[difficulties==6][:,1])
            hard_dist_goal_at_turning_points_towards_target = np.hstack(np.array(all_returns[id_fast_runs_percentile],dtype=object)[difficulties==6][:,2])
            print(f"number of hard runs: {len(np.array(all_returns[id_fast_runs_percentile],dtype=object)[difficulties==6][:,0])}")
            print(f"number of hard turns: {len(hard_turning_points_towards_target)}")
            print(f"hard turns per run : {len(hard_turning_points_towards_target) / len(np.array(all_returns[id_fast_runs_percentile],dtype=object)[difficulties==6][:,0])}\n")
            
            if plot:        
                # target
                print(fast_runs_percentile)
                f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(30,5))
                f.suptitle("overall")


                # overall
                ax1.hist(dist_at_turning_points_towards_target, bins=100)
                ax1.axvline(np.median(dist_at_turning_points_towards_target),label="median", color="g")
                ax1.set_title("histogram turning points towards target at distances to target fish")
                ax1.set_xlabel("robot distance to target fish")
                ax1.set_ylabel("number of turning points at distance X")

                ax2.hist(dist_goal_at_turning_points_towards_target, bins=100)
                ax2.axvline(np.median(dist_goal_at_turning_points_towards_target),label="median", color="g")
                ax2.set_title("histogram turning points towards target at distances to goal zone")
                ax2.set_xlabel("robot distance to goal")
                ax2.set_ylabel("number of turning points at distance X")

                dist_goal_at_turning_points_towards_target = np.asarray(dist_goal_at_turning_points_towards_target)
                zero_filtered_dists = dist_goal_at_turning_points_towards_target[dist_goal_at_turning_points_towards_target>0]
                overall_median_dist_goal_non_zero = np.median(zero_filtered_dists)
                ax3.hist(zero_filtered_dists, bins=100)
                ax3.axvline(np.median(zero_filtered_dists),label="median", color="g")
                ax3.set_title("histogram turning points towards target at distances to goal zone excluding robot in goal zone")
                ax3.set_xlabel("robot distance to goal")
                ax3.set_ylabel("number of turning points at distance X")

                print(f"overall median dist to target: {np.median(dist_at_turning_points_towards_target)}")
                print(f"overall median dist to goal: {np.median(dist_goal_at_turning_points_towards_target)}")
                print(f"overall median dist to goal - non zero: {overall_median_dist_goal_non_zero}")
                plt.legend()
                plt.savefig(f"./plots/{id_fast_runs_percentile}_turning_points_at_dists_overall")


                # easy
                f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(30,5))
                f.suptitle("easy")
                ax1.hist(easy_dist_at_turning_points_towards_target, bins=100)
                ax1.axvline(np.median(easy_dist_at_turning_points_towards_target),label="median", color="g")
                ax1.set_title("histogram turning points towards target at distances to target fish")
                ax1.set_xlabel("robot distance to target fish")
                ax1.set_ylabel("number of turning points at distance X")

                ax2.hist(easy_dist_goal_at_turning_points_towards_target, bins=100)
                ax2.axvline(np.median(easy_dist_goal_at_turning_points_towards_target),label="median", color="g")
                ax2.set_title("histogram turning points towards target at distances to goal zone")
                ax2.set_xlabel("robot distance to goal")
                ax2.set_ylabel("number of turning points at distance X")

                easy_dist_goal_at_turning_points_towards_target = np.asarray(easy_dist_goal_at_turning_points_towards_target)
                zero_filtered_dists = easy_dist_goal_at_turning_points_towards_target[easy_dist_goal_at_turning_points_towards_target>0]
                easy_median_dist_goal_non_zero = np.median(zero_filtered_dists)
                ax3.hist(zero_filtered_dists, bins=100)
                ax3.axvline(np.median(zero_filtered_dists),label="median", color="g")
                ax3.set_title("histogram turning points towards target at distances to goal zone excluding robot in goal zone")
                ax3.set_xlabel("robot distance to goal")
                ax3.set_ylabel("number of turning points at distance X")
                
                print(f"easy median dist to target: {np.median(easy_dist_at_turning_points_towards_target)}")
                print(f"easy median dist to goal: {np.median(easy_dist_goal_at_turning_points_towards_target)}")
                print(f"easy median dist to goal - non zero: {easy_median_dist_goal_non_zero}")
                plt.legend()
                plt.savefig(f"./plots/{id_fast_runs_percentile}_turning_points_at_dists_easy")


                # medium
                f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(30,5))
                f.suptitle("medium")

                ax1.hist(medium_dist_at_turning_points_towards_target, bins=100)
                ax1.axvline(np.median(medium_dist_at_turning_points_towards_target),label="median", color="g")
                ax1.set_title("histogram turning points towards target at distances to target fish")
                ax1.set_xlabel("robot distance to target fish")
                ax1.set_ylabel("number of turning points at distance X")

                ax2.hist(medium_dist_goal_at_turning_points_towards_target, bins=100)
                ax2.axvline(np.median(medium_dist_goal_at_turning_points_towards_target),label="median", color="g")
                ax2.set_title("histogram turning points towards target at distances to goal zone")
                ax2.set_xlabel("robot distance to goal")
                ax2.set_ylabel("number of turning points at distance X")

                medium_dist_goal_at_turning_points_towards_target = np.asarray(medium_dist_goal_at_turning_points_towards_target)
                zero_filtered_dists = medium_dist_goal_at_turning_points_towards_target[medium_dist_goal_at_turning_points_towards_target>0]
                medium_median_dist_goal_non_zero = np.median(zero_filtered_dists)
                ax3.hist(zero_filtered_dists, bins=100)
                ax3.axvline(np.median(zero_filtered_dists),label="median", color="g")
                ax3.set_title("histogram turning points towards target at distances to goal zone excluding robot in goal zone")
                ax3.set_xlabel("robot distance to goal")
                ax3.set_ylabel("number of turning points at distance X")
                
                
                print(f"medium median dist to target: {np.median(medium_dist_at_turning_points_towards_target)}")
                print(f"medium median dist to goal: {np.median(medium_dist_goal_at_turning_points_towards_target)}")
                print(f"medium median dist to goal - non zero: {medium_median_dist_goal_non_zero}")
                plt.legend()
                plt.savefig(f"./plots/{id_fast_runs_percentile}_turning_points_at_dists_medium")
                
                # hard
                f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(30,5))
                f.suptitle("hard")

                ax1.hist(hard_dist_at_turning_points_towards_target, bins=100)
                ax1.axvline(np.median(hard_dist_at_turning_points_towards_target),label="median", color="g")
                ax1.set_title("histogram turning points towards target at distances to target fish")
                ax1.set_xlabel("robot distance to target fish")
                ax1.set_ylabel("number of turning points at distance X")

                ax2.hist(hard_dist_goal_at_turning_points_towards_target, bins=100)
                ax2.axvline(np.median(hard_dist_goal_at_turning_points_towards_target),label="median", color="g")
                ax2.set_title("histogram turning points towards target at distances to goal zone")
                ax2.set_xlabel("robot distance to goal")
                ax2.set_ylabel("number of turning points at distance X")

                hard_dist_goal_at_turning_points_towards_target = np.asarray(hard_dist_goal_at_turning_points_towards_target)
                zero_filtered_dists = hard_dist_goal_at_turning_points_towards_target[hard_dist_goal_at_turning_points_towards_target>0]
                hard_median_dist_goal_non_zero = np.median(zero_filtered_dists)
                ax3.hist(zero_filtered_dists, bins=100)
                ax3.axvline(np.median(zero_filtered_dists),label="median", color="g")
                ax3.set_title("histogram turning points towards target at distances to goal zone excluding robot in goal zone")
                ax3.set_xlabel("robot distance to goal")
                ax3.set_ylabel("number of turning points at distance X")
                
                print(f"hard median dist to target: {np.median(hard_dist_at_turning_points_towards_target)}")
                print(f"hard median dist to goal: {np.median(hard_dist_goal_at_turning_points_towards_target)}")
                print(f"hard median dist to goal - non zero: {hard_median_dist_goal_non_zero}")
                plt.legend()
                plt.savefig(f"./plots/{id_fast_runs_percentile}_turning_points_at_dists_hard")
                plt.show()
    else:
        turning_points_towards_target = np.hstack(all_returns[:,0])
        dist_at_turning_points_towards_target = np.hstack(all_returns[:,1])
        dist_goal_at_turning_points_towards_target = np.hstack(all_returns[:,2])
        
        
        if plot:        
            # target
            print(fast_runs_percentile)
            f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(30,5))
            
            #overall
            ax1.hist(dist_at_turning_points_towards_target, bins=100)
            ax1.set_title("histogram turning points towards target at distances to target fish")
            ax1.set_xlabel("robot distance to target fish")
            ax1.set_ylabel("number of turning points at distance X")

            ax2.hist(dist_goal_at_turning_points_towards_target, bins=100)
            ax2.set_title("histogram turning points towards target at distances to goal zone")
            ax2.set_xlabel("robot distance to goal")
            ax2.set_ylabel("number of turning points at distance X")

            dist_goal_at_turning_points_towards_target = np.asarray(dist_goal_at_turning_points_towards_target)
            zero_filtered_dists = dist_goal_at_turning_points_towards_target[dist_goal_at_turning_points_towards_target>0]
            ax3.hist(zero_filtered_dists, bins=100)
            ax3.set_title("histogram turning points towards target at distances to goal zone excluding robot in goal zone")
            ax3.set_xlabel("robot distance to goal")
            ax3.set_ylabel("number of turning points at distance X")
            
    
    return all_returns
    
def find_turns_torwards_target_in_run(run, id_run, date_dict, run_arguments=None, plot=False, verbose=False):    
    # get
    fish1_pos_this_run = np.array([all_fish_in_ts[1]['position'] for all_fish_in_ts in date_dict['fish'][run[0]:run[1]+1]])
    robot_pos_run = np.array(date_dict['positions'][run[0]:run[1]+1])
    robot_dir_run = np.array(date_dict['orientation'][run[0]:run[1]+1])
    difficulty = date_dict['difficulties'][id_run]
    assert len(fish1_pos_this_run) == len(robot_pos_run) == len(robot_dir_run)
    run_target_dists = np.linalg.norm(np.array(fish1_pos_this_run)-np.array(robot_pos_run), axis=1)
    
    # target
    turning_points_towards_target = get_run_turns_towards_target(fish1_pos_this_run, robot_pos_run, robot_dir_run, run_target_dists, plot=False)
    dists_target_at_turning_points_towards_target = run_target_dists[turning_points_towards_target]
    dist_goal_at_turning_points_towards_target = [get_distance_to_goal(robot_pos_run[turning_point]) for turning_point in turning_points_towards_target]
    
    return [turning_points_towards_target, dists_target_at_turning_points_towards_target, dist_goal_at_turning_points_towards_target, difficulty]

In [None]:
# all successful runs
all_turning_points_towards_target, all_dist_at_turning_points_towards_target, all_dist_goal_at_turning_points_towards_target = find_all_turns_in_dates_files(dates_dict, start_date=None, end_date=None, challenges=True, only_successful=True, fast_runs_percentile=None, plot=True)
# 25th percentile
quarter_turning_points_towards_target, quarter_dist_at_turning_points_towards_target, quarter_dist_goal_at_turning_points_towards_target = find_all_turns_in_dates_files(dates_dict, start_date=None, end_date=None, challenges=True, only_successful=True, fast_runs_percentile=25, plot=True)
# 10th percentile
tenth_turning_points_towards_target, tenth_dist_at_turning_points_towards_target, tenth_dist_goal_at_turning_points_towards_target = find_all_turns_in_dates_files(dates_dict, start_date=None, end_date=None, challenges=True, only_successful=True, fast_runs_percentile=10, plot=True)

In [None]:
# find turn in all runs over all dates
start_date = "2021-11-19"
end_date = "2022-10-25"
#fast_runs_percentiles = [100,25,10]

all_returns = find_all_turns_in_dates_files(start_date=start_date, end_date=end_date, load_only_challenge_data=True, challenges=True, only_successful=True, fast_runs_percentiles=fast_runs_percentiles, plot=True, verbose=False)

In [None]:
all_turning_points_towards_target = np.hstack(np.array(all_returns[0],dtype=object)[:,0])
all_dist_at_turning_points_towards_target = np.hstack(np.array(all_returns[0],dtype=object)[:,1])
all_dist_goal_at_turning_points_towards_target = np.hstack(np.array(all_returns[0],dtype=object)[:,2])

quarter_turning_points_towards_target = np.hstack(np.array(all_returns[1],dtype=object)[:,0])
quarter_dist_at_turning_points_towards_target = np.hstack(np.array(all_returns[1],dtype=object)[:,1])
quarter_dist_goal_at_turning_points_towards_target = np.hstack(np.array(all_returns[1],dtype=object)[:,2])

tenth_turning_points_towards_target = np.hstack(np.array(all_returns[2],dtype=object)[:,0])
tenth_dist_at_turning_points_towards_target = np.hstack(np.array(all_returns[2],dtype=object)[:,1])
tenth_dist_goal_at_turning_points_towards_target = np.hstack(np.array(all_returns[2],dtype=object)[:,2])

In [None]:
print(f"number of turning points towards target overall: {len(all_turning_points_towards_target)}")
print(f"number of turning points towards target in fast runs (first quartile): {len(quarter_turning_points_towards_target)}")
print(f"number of turning points towards target in fast runs (first tenth): {len(tenth_turning_points_towards_target)}")
print(f"percentage of turning points in fast runs (first quartile) of all turning points: {len(quarter_turning_points_towards_target) / len(all_turning_points_towards_target) * 100}")
print(f"percentage of turning points in fast runs (first tenth) of all turning points: {len(tenth_turning_points_towards_target) / len(all_turning_points_towards_target) * 100}")

# Calculate path tortuosity

In [None]:
%matplotlib inline
all_dates_fish1_tortuosity_straightness_index, all_dates_robot_tortuosity_straightness_index, all_dates_fish1_sinuosity, all_dates_robot_sinuosity = calculate_tortuosity(dates_dict, only_successful=True, plot=True)

In [None]:
def calculate_turtuosity_in_all_dates_files(start_date=None, end_date=None, load_only_challenge_data=True, challenges=True, only_successful=True, fast_runs_percentiles=None, plot=False, verbose=False):
    all_returns = iteratively_evaluate_dates_files_runwise(eval_run_function=calculate_turtuosity_for_run, start_date=start_date, end_date=end_date, load_only_challenge_data=load_only_challenge_data, challenges=challenges, only_successful=only_successful, fast_runs_percentiles=fast_runs_percentiles, run_arguments=None, plot=plot, verbose=verbose)
    all_returns = np.asarray(all_returns, dtype='object')
    
    if fast_runs_percentiles is not None:
        for id_fast_runs_percentile, fast_runs_percentile in enumerate(fast_runs_percentiles):
            
            all_dates_robot_tortuosity_straightness_index = np.hstack(np.array(all_returns[id_fast_runs_percentile])[:,0])
            all_dates_fish1_tortuosity_straightness_index = np.hstack(np.array(all_returns[id_fast_runs_percentile])[:,1])
            all_dates_robot_sinuosity = np.hstack(np.array(all_returns[id_fast_runs_percentile])[:,2])
            all_dates_fish1_sinuosity = np.hstack(np.array(all_returns[id_fast_runs_percentile])[:,3])
            all_dates_difficulties = np.hstack(np.array(all_returns[id_fast_runs_percentile])[:,4])
            
            easy_all_dates_robot_tortuosity_straightness_index = np.hstack(np.array(all_returns[id_fast_runs_percentile])[:,0])[all_dates_difficulties==2]
            easy_all_dates_fish1_tortuosity_straightness_index = np.hstack(np.array(all_returns[id_fast_runs_percentile])[:,1])[all_dates_difficulties==2]
            easy_all_dates_robot_sinuosity = np.hstack(np.array(all_returns[id_fast_runs_percentile])[:,2])[all_dates_difficulties==2]
            easy_all_dates_fish1_sinuosity = np.hstack(np.array(all_returns[id_fast_runs_percentile])[:,3])[all_dates_difficulties==2]
            
            medium_all_dates_robot_tortuosity_straightness_index = np.hstack(np.array(all_returns[id_fast_runs_percentile])[:,0])[all_dates_difficulties==4]
            medium_all_dates_fish1_tortuosity_straightness_index = np.hstack(np.array(all_returns[id_fast_runs_percentile])[:,1])[all_dates_difficulties==4]
            medium_all_dates_robot_sinuosity = np.hstack(np.array(all_returns[id_fast_runs_percentile])[:,2])[all_dates_difficulties==4]
            medium_all_dates_fish1_sinuosity = np.hstack(np.array(all_returns[id_fast_runs_percentile])[:,3])[all_dates_difficulties==4]
            
            hard_all_dates_robot_tortuosity_straightness_index = np.hstack(np.array(all_returns[id_fast_runs_percentile])[:,0])[all_dates_difficulties==6]
            hard_all_dates_fish1_tortuosity_straightness_index = np.hstack(np.array(all_returns[id_fast_runs_percentile])[:,1])[all_dates_difficulties==6]
            hard_all_dates_robot_sinuosity = np.hstack(np.array(all_returns[id_fast_runs_percentile])[:,2])[all_dates_difficulties==6]
            hard_all_dates_fish1_sinuosity = np.hstack(np.array(all_returns[id_fast_runs_percentile])[:,3])[all_dates_difficulties==6]
    
            if plot:
                print(fast_runs_percentile)
                print(f"overall median SI robot: {np.median(all_dates_robot_tortuosity_straightness_index)}")
                print(f"overall median SI fish: {np.median(all_dates_fish1_tortuosity_straightness_index)}")
                print(f"easy median SI robot: {np.median(easy_all_dates_robot_tortuosity_straightness_index)}")
                print(f"easy median SI fish: {np.median(easy_all_dates_fish1_tortuosity_straightness_index)}")
                print(f"medium median SI robot: {np.median(medium_all_dates_robot_tortuosity_straightness_index)}")
                print(f"medium median SI fish: {np.median(medium_all_dates_fish1_tortuosity_straightness_index)}")
                print(f"hard median SI robot: {np.median(hard_all_dates_robot_tortuosity_straightness_index)}")
                print(f"hard median SI fish: {np.median(hard_all_dates_fish1_tortuosity_straightness_index)}")
                
                print(f"\noverall median sinuo robot: {np.median(all_dates_robot_sinuosity)}")
                print(f"overall median sinuo fish: {np.median(all_dates_fish1_sinuosity)}")
                print(f"easy median sinuo robot: {np.median(easy_all_dates_robot_sinuosity)}")
                print(f"easy median sinuo fish: {np.median(easy_all_dates_fish1_sinuosity)}")
                print(f"medium median sinuo robot: {np.median(medium_all_dates_robot_sinuosity)}")
                print(f"medium median sinuo fish: {np.median(medium_all_dates_fish1_sinuosity)}")
                print(f"hard median sinuo robot: {np.median(hard_all_dates_robot_sinuosity)}")
                print(f"hard median sinuo fish: {np.median(hard_all_dates_fish1_sinuosity)}")



                
                # plot straightness index
                # overall
                fig = plt.figure(figsize=(16,9))
                bins = np.linspace(0, 0.2, 100)
                plt.title("overall straightness index of robot and target fish path (distance_start_end_point / distance_traveled)")
                plt.hist(all_dates_fish1_tortuosity_straightness_index, bins=bins, color="b", alpha=0.5, label="fish1 straightness index")
                plt.hist(all_dates_robot_tortuosity_straightness_index, bins=bins, color="g", alpha=0.5, label="robot straightness index")
                plt.axvline(np.median(all_dates_fish1_tortuosity_straightness_index), color="b", label="median fish")
                plt.axvline(np.median(all_dates_robot_tortuosity_straightness_index), color="g", label="median robot")
                plt.xlabel("straightness index (distance_start_end_point / distance_traveled)")
                plt.ylabel("number of runs")
                plt.legend()
                plt.show()
                
                # all difficulties robot
                fig = plt.figure(figsize=(16,9))
                bins = np.linspace(0, 0.2, 100)
                plt.title("all difficulties straightness index of robot path (distance_start_end_point / distance_traveled)")
                plt.hist(all_dates_robot_tortuosity_straightness_index, bins=bins, color="k", alpha=0.5, label="overall robot straightness index")
                plt.hist(easy_all_dates_robot_tortuosity_straightness_index, bins=bins, color="g", alpha=0.5, label="easy robot straightness index")
                plt.hist(medium_all_dates_robot_tortuosity_straightness_index, bins=bins, color="y", alpha=0.5, label="medium robot straightness index")
                plt.hist(hard_all_dates_robot_tortuosity_straightness_index, bins=bins, color="r", alpha=0.5, label="hard robot straightness index")
                plt.axvline(np.median(all_dates_robot_tortuosity_straightness_index), color="k", label="median overall")
                plt.axvline(np.median(easy_all_dates_robot_tortuosity_straightness_index), color="g", label="median easy")
                plt.axvline(np.median(medium_all_dates_robot_tortuosity_straightness_index), color="y", label="median medium")
                plt.axvline(np.median(hard_all_dates_robot_tortuosity_straightness_index), color="r", label="median hard")
                plt.xlabel("straightness index (distance_start_end_point / distance_traveled)")
                plt.ylabel("number of runs")
                plt.legend()
                plt.show()
                
                # all difficulties fish
                fig = plt.figure(figsize=(16,9))
                bins = np.linspace(0, 0.2, 100)
                plt.title("all difficulties straightness index of fish path (distance_start_end_point / distance_traveled)")
                plt.hist(all_dates_fish1_tortuosity_straightness_index, bins=bins, color="k", alpha=0.5, label="overall fish straightness index")
                plt.hist(easy_all_dates_fish1_tortuosity_straightness_index, bins=bins, color="g", alpha=0.5, label="easy fish straightness index")
                plt.hist(medium_all_dates_fish1_tortuosity_straightness_index, bins=bins, color="y", alpha=0.5, label="medium fish straightness index")
                plt.hist(hard_all_dates_fish1_tortuosity_straightness_index, bins=bins, color="r", alpha=0.5, label="hard fish straightness index")
                plt.axvline(np.median(all_dates_fish1_tortuosity_straightness_index), color="k", label="median overall")
                plt.axvline(np.median(easy_all_dates_fish1_tortuosity_straightness_index), color="g", label="median easy")
                plt.axvline(np.median(medium_all_dates_fish1_tortuosity_straightness_index), color="y", label="median medium")
                plt.axvline(np.median(hard_all_dates_fish1_tortuosity_straightness_index), color="r", label="median hard")
                plt.xlabel("straightness index (distance_start_end_point / distance_traveled)")
                plt.ylabel("number of runs")
                plt.legend()
                plt.show()
                
                #easy
                fig = plt.figure(figsize=(16,9))
                bins = np.linspace(0, 0.2, 100)
                plt.title("easy straightness index of robot and target fish path (distance_start_end_point / distance_traveled)")
                plt.hist(easy_all_dates_fish1_tortuosity_straightness_index, bins=bins, color="b", alpha=0.5, label="fish1 straightness index")
                plt.hist(easy_all_dates_robot_tortuosity_straightness_index, bins=bins, color="g", alpha=0.5, label="robot straightness index")
                plt.axvline(np.median(easy_all_dates_fish1_tortuosity_straightness_index), color="b", label="median fish")
                plt.axvline(np.median(easy_all_dates_robot_tortuosity_straightness_index), color="g", label="median robot")
                plt.xlabel("straightness index (distance_start_end_point / distance_traveled)")
                plt.ylabel("number of runs")
                plt.legend()
                plt.show()
                #medium
                fig = plt.figure(figsize=(16,9))
                bins = np.linspace(0, 0.2, 100)
                plt.title("medium straightness index of robot and target fish path (distance_start_end_point / distance_traveled)")
                plt.hist(medium_all_dates_fish1_tortuosity_straightness_index, bins=bins, color="b", alpha=0.5, label="fish1 straightness index")
                plt.hist(medium_all_dates_robot_tortuosity_straightness_index, bins=bins, color="g", alpha=0.5, label="robot straightness index")
                plt.axvline(np.median(medium_all_dates_fish1_tortuosity_straightness_index), color="b", label="median fish")
                plt.axvline(np.median(medium_all_dates_robot_tortuosity_straightness_index), color="g", label="median robot")                
                plt.xlabel("straightness index (distance_start_end_point / distance_traveled)")
                plt.ylabel("number of runs")
                plt.legend()
                plt.show()
                #hard
                fig = plt.figure(figsize=(16,9))
                bins = np.linspace(0, 0.2, 100)
                plt.title("hard straightness index of robot and target fish path (distance_start_end_point / distance_traveled)")
                plt.hist(hard_all_dates_fish1_tortuosity_straightness_index, bins=bins, color="b", alpha=0.5, label="fish1 straightness index")
                plt.hist(hard_all_dates_robot_tortuosity_straightness_index, bins=bins, color="g", alpha=0.5, label="robot straightness index")
                plt.axvline(np.median(hard_all_dates_fish1_tortuosity_straightness_index), color="b", label="median fish")
                plt.axvline(np.median(hard_all_dates_robot_tortuosity_straightness_index), color="g", label="median robot")
                plt.xlabel("straightness index (distance_start_end_point / distance_traveled)")
                plt.ylabel("number of runs")
                plt.legend()
                plt.show()

                # plot sinuosity
                #overall
                fig = plt.figure(figsize=(16,9))
                bins = np.linspace(0, 0.1, 100)
                plt.title("overall sinuosity of robot and target fish path")
                plt.hist(all_dates_fish1_sinuosity, bins=bins, color="b", alpha=0.5, label="fish1 sinuosity")
                plt.hist(all_dates_robot_sinuosity, bins=bins, color="g", alpha=0.5, label="robot sinuosity")
                plt.axvline(np.median(all_dates_fish1_sinuosity), color="b", label="median fish")
                plt.axvline(np.median(all_dates_robot_sinuosity), color="g", label="median robot")
                plt.xlabel("sinuosity")
                plt.ylabel("number of runs")
                plt.legend()
                plt.show()
                
                # all difficulties robot
                fig = plt.figure(figsize=(16,9))
                bins = np.linspace(0, 0.2, 100)
                plt.title("all sinuosity of robot path")
                plt.hist(all_dates_robot_sinuosity, bins=bins, color="k", alpha=0.5, label="overall robot sinuosity")
                plt.hist(easy_all_dates_robot_sinuosity, bins=bins, color="g", alpha=0.5, label="easy robot sinuosity")
                plt.hist(medium_all_dates_robot_sinuosity, bins=bins, color="y", alpha=0.5, label="medium robot sinuosity")
                plt.hist(hard_all_dates_robot_sinuosity, bins=bins, color="r", alpha=0.5, label="hard robot sinuosity")
                plt.axvline(np.median(all_dates_robot_sinuosity), color="k", label="median overall")
                plt.axvline(np.median(easy_all_dates_robot_sinuosity), color="g", label="median easy")
                plt.axvline(np.median(medium_all_dates_robot_sinuosity), color="y", label="median medium")
                plt.axvline(np.median(hard_all_dates_robot_sinuosity), color="r", label="median hard")
                plt.xlabel("sinuosity")
                plt.ylabel("number of runs")
                plt.legend()
                plt.show()
                
                # all difficulties fish
                fig = plt.figure(figsize=(16,9))
                bins = np.linspace(0, 0.2, 100)
                plt.title("all sinuosity of fish path")
                plt.hist(all_dates_fish1_sinuosity, bins=bins, color="k", alpha=0.5, label="overall fish sinuosity")
                plt.hist(easy_all_dates_fish1_sinuosity, bins=bins, color="g", alpha=0.5, label="easy fish sinuosity")
                plt.hist(medium_all_dates_fish1_sinuosity, bins=bins, color="y", alpha=0.5, label="medium fish sinuosity")
                plt.hist(hard_all_dates_fish1_sinuosity, bins=bins, color="r", alpha=0.5, label="hard fish sinuosity")
                plt.axvline(np.median(all_dates_fish1_sinuosity), color="k", label="median overall")
                plt.axvline(np.median(easy_all_dates_fish1_sinuosity), color="g", label="median easy")
                plt.axvline(np.median(medium_all_dates_fish1_sinuosity), color="y", label="median medium")
                plt.axvline(np.median(hard_all_dates_fish1_sinuosity), color="r", label="median hard")
                plt.xlabel("sinuosity")
                plt.ylabel("number of runs")
                plt.legend()
                plt.show()
                
                #easy
                fig = plt.figure(figsize=(16,9))
                bins = np.linspace(0, 0.1, 100)
                plt.title("easy sinuosity of robot and target fish path")
                plt.hist(easy_all_dates_fish1_sinuosity, bins=bins, color="b", alpha=0.5, label="fish1 sinuosity")
                plt.hist(easy_all_dates_robot_sinuosity, bins=bins, color="g", alpha=0.5, label="robot sinuosity")
                plt.axvline(np.median(easy_all_dates_fish1_sinuosity), color="b", label="median fish")
                plt.axvline(np.median(easy_all_dates_robot_sinuosity), color="g", label="median robot")
                plt.xlabel("sinuosity")
                plt.ylabel("number of runs")
                plt.legend()
                plt.show()
                
                #medium
                fig = plt.figure(figsize=(16,9))
                bins = np.linspace(0, 0.1, 100)
                plt.title("medium sinuosity of robot and target fish path")
                plt.hist(medium_all_dates_fish1_sinuosity, bins=bins, color="b", alpha=0.5, label="fish1 sinuosity")
                plt.hist(medium_all_dates_robot_sinuosity, bins=bins, color="g", alpha=0.5, label="robot sinuosity")
                plt.axvline(np.median(medium_all_dates_fish1_sinuosity), color="b", label="median fish")
                plt.axvline(np.median(medium_all_dates_robot_sinuosity), color="g", label="median robot")
                plt.xlabel("sinuosity")
                plt.ylabel("number of runs")
                plt.legend()
                plt.show()
                
                #hard
                fig = plt.figure(figsize=(16,9))
                bins = np.linspace(0, 0.1, 100)
                plt.title("hard sinuosity of robot and target fish path")
                plt.hist(hard_all_dates_fish1_sinuosity, bins=bins, color="b", alpha=0.5, label="fish1 sinuosity")
                plt.hist(hard_all_dates_robot_sinuosity, bins=bins, color="g", alpha=0.5, label="robot sinuosity")
                plt.axvline(np.median(hard_all_dates_fish1_sinuosity), color="b", label="median fish")
                plt.axvline(np.median(hard_all_dates_robot_sinuosity), color="g", label="median robot")
                plt.xlabel("sinuosity")
                plt.ylabel("number of runs")
                plt.legend()
                plt.show()

def calculate_turtuosity_for_run(run, id_run, date_dict, run_arguments=None, plot=False, verbose=False):
    # get data    
    fish1_pos_this_run = np.array([all_fish_in_ts[1]['position'] for all_fish_in_ts in date_dict['fish'][run[0]:run[1]+1]])
    fish1_dir_run = np.array([all_fish_in_ts[1]['orientation'] for all_fish_in_ts in date_dict['fish'][run[0]:run[1]+1]])

    robot_pos_run = np.array(date_dict['positions'][run[0]:run[1]+1])
    robot_dir_run = np.array(date_dict['orientation'][run[0]:run[1]+1])
    difficulty = date_dict['difficulties'][id_run]

    '''
    straightness index

    (distance_start_end_point / distance_traveled)
    '''
    # get dist to next pos per timestep
    fish1_dists_per_step = np.abs(np.linalg.norm(np.diff(fish1_pos_this_run),axis=1))
    robot_dists_per_step = np.abs(np.linalg.norm(np.diff(robot_pos_run),axis=1))
    # get distance traveled
    fish1_dist_traveled = np.sum(fish1_dists_per_step)
    robot_dist_traveled = np.sum(robot_dists_per_step)
    # get dist between start end end
    fish1_dist_start_end = np.abs(np.linalg.norm(fish1_pos_this_run[0]-fish1_pos_this_run[-1]))
    robot_dist_start_end = np.abs(np.linalg.norm(robot_pos_run[0]-robot_pos_run[-1]))
    # get tortuosity (track length / distance between start and end points)
    fish1_tortuosity_straightness_index = fish1_dist_start_end / fish1_dist_traveled if fish1_dist_traveled != 0 else 0
    robot_tortuosity_straightness_index = robot_dist_start_end / robot_dist_traveled if robot_dist_traveled !=0 else 0
    #


    '''
    sinuosity

    S = 2[p(((1 + c)/(1 - c)) + b^2)]^-0.5
    c is the mean cosine of turning angles
    b is the coefficient of variation of the step length
    p is the mean step length
    '''
    robot_calc_turn_angle = lambda x : np.arctan2(x[1][1], x[1][0]) - np.arctan2(x[0][1], x[0][0]) #difference between vec1 angle to x axis and vec2 angle to x axis

    #robot
    robot_turning_angles = forward_rolling_window_apply(robot_dir_run, window_size=2, function=robot_calc_turn_angle)
    for id_angle, angle in enumerate(robot_turning_angles): #normalize angles in to [-PI,PI]
        if angle > np.pi:
            angle -= 2 * np.pi
            robot_turning_angles[id_angle] = angle

        elif angle <= -np.pi:
            angle += 2 * np.pi
            robot_turning_angles[id_angle] = angle

    robot_cosines_of_turning_angles = np.cos(robot_turning_angles)
    robot_mean_cosine_of_turning_angles = np.mean(robot_cosines_of_turning_angles)
    robot_mean_step_length = np.mean(robot_dists_per_step)
    robot_coefficient_of_variation_of_step_length = np.std(robot_dists_per_step) / robot_mean_step_length

    c = robot_mean_cosine_of_turning_angles
    b = robot_coefficient_of_variation_of_step_length
    p = robot_mean_step_length
    robot_sinuosity = 2 * np.power(( p*( ((1+c)/(1-c)) + np.power(b,2) )),-0.5)

    #fish1
    fish1_dir_run = [np.radians(angle+180) for angle in fish1_dir_run]
    for id_angle, angle in enumerate(fish1_dir_run): #normalize angles in to [-PI,PI]
        if angle > np.pi:
            angle -= 2 * np.pi
            fish1_dir_run[id_angle] = angle

        elif angle <= -np.pi:
            angle += 2 * np.pi
            fish1_dir_run[id_angle] = angle
    fish1_calc_turning_angle = lambda x : x[1] - x[0]
    fish1_turning_angles = forward_rolling_window_apply(fish1_dir_run, window_size=2, function=fish1_calc_turning_angle)
    for id_angle, angle in enumerate(fish1_turning_angles): #normalize angles in to [-PI,PI]
        if angle > np.pi:
            angle -= 2 * np.pi
            fish1_turning_angles[id_angle] = angle

        elif angle <= -np.pi:
            angle += 2 * np.pi
            fish1_turning_angles[id_angle] = angle

    fish1_cosines_of_turning_angles = np.cos(fish1_turning_angles)
    fish1_mean_cosine_of_turning_angles = np.mean(fish1_cosines_of_turning_angles)
    fish1_mean_step_length = np.mean(fish1_dists_per_step)
    fish1_coefficient_of_variation_of_step_length = np.std(fish1_dists_per_step) / fish1_mean_step_length

    c = fish1_mean_cosine_of_turning_angles
    b = fish1_coefficient_of_variation_of_step_length
    p = fish1_mean_step_length
    fish1_sinuosity = 2 * np.power(( p*( ((1+c)/(1-c)) + np.power(b,2) )),-0.5)

    return [robot_tortuosity_straightness_index, fish1_tortuosity_straightness_index, robot_sinuosity, fish1_sinuosity, difficulty]



In [None]:
# find turn in all runs over all dates
start_date = "2021-11-19"
end_date = "2022-10-25"
#fast_runs_percentiles = [100,25,10]

all_returns = calculate_turtuosity_in_all_dates_files(start_date=start_date, end_date=end_date, load_only_challenge_data=True, challenges=True, only_successful=True, fast_runs_percentiles=fast_runs_percentiles, plot=True, verbose=False)

# Stats page

In [None]:
def evaluate_day_stats(date_dict):
    num_loaded_days = 0
    num_runs = 0
    num_ch_runs = 0
    num_succ_runs = 0
    use_time_h = 0
    est_visitors = 0
    
    if date_dict:
        num_loaded_days = 1
        num_runs = len(date_dict["runs"])
        num_ch_runs = len(np.flatnonzero(date_dict["challenges"]))
        num_succ_runs = len(np.flatnonzero(date_dict["successful"]))
        use_time_h = np.sum(date_dict["run_lengths"])/60/60
        est_visitors = len(date_dict['runs']) - len(np.flatnonzero(date_dict["challenges"]))
    
    return [num_loaded_days, num_runs, num_ch_runs, num_succ_runs, use_time_h, est_visitors]

start_date = "2021-11-19"
end_date = "2022-10-25"
stats = np.array(iteratively_evaluate_dates_files(start_date, end_date, eval_date_function=evaluate_day_stats, load_only_challenge_data=False, verbose=False))
print(f"Done generating stats:\n")
print(f"Total number of days: {(datetime.strptime(end_date, '%Y-%m-%d') - datetime.strptime(start_date, '%Y-%m-%d')).days}")
print(f"Total number of active days: {np.sum(stats[:,0])}")
print(f"Total number of runs: {np.sum(stats[:,1])}")
print(f"Total number of challenge runs: {np.sum(stats[:,2])}")
print(f"Total number of successful runs: {np.sum(stats[:,3])}")
print(f"Total use time: {np.sum(stats[:,4])} h")
print(f"Total number of estimated visitors: {np.sum(stats[:,5])}")

# load and plot pawel leadership data

In [None]:
def load_pawel_run_file(run_file):
    data = np.load(run_file,allow_pickle=True).item()
    return data

In [None]:
# load run by run
pawel_path = './leading_model_pawel/output/1000/'
pawel_model_files = glob.glob(pawel_path + '*.npy')

# collect data
runs = []
for run_file in pawel_model_files:
    runs.append(load_pawel_run_file(run_file))

#### turns

In [None]:
def calculate_turns_toward_target_fish(run):
    fish1_pos_this_run = np.array(run['pos'])[:,1]
    robot_pos_run =  np.array(run['pos'])[:,0]
    robot_dir_run = [[np.cos(angle), np.sin(angle)] for angle in np.array(run['phi'])[:,0]]
    assert len(fish1_pos_this_run) == len(robot_pos_run) == len(robot_dir_run)
    run_target_dists = np.linalg.norm(np.array(fish1_pos_this_run)-np.array(robot_pos_run), axis=1)
    
    # target
    turning_points_towards_target = get_run_turns_towards_target(fish1_pos_this_run, robot_pos_run, robot_dir_run, run_target_dists, prominence_target_dist=10, plot=False)
    dists_target_at_turning_points_towards_target = run_target_dists[turning_points_towards_target]
    dist_goal_at_turning_points_towards_target = [get_distance_to_goal(robot_pos_run[turning_point]) for turning_point in turning_points_towards_target]
    
    return [turning_points_towards_target, dists_target_at_turning_points_towards_target, dist_goal_at_turning_points_towards_target]

In [None]:
# calculate turns
all_turns_towards_target = []
all_dists_target_at_turning_points_towards_target = []
all_dist_goal_at_turning_points_towards_target = []
for run in runs:
    run_turns = calculate_turns_toward_target_fish(run)
    all_turns_towards_target.append(run_turns[0])
    all_dists_target_at_turning_points_towards_target.append(run_turns[1])
    all_dist_goal_at_turning_points_towards_target.append(run_turns[2])
    
plot_turns(all_turns_towards_target, all_dists_target_at_turning_points_towards_target, all_dist_goal_at_turning_points_towards_target)
print(f"turns per run : {len(np.hstack(all_turns_towards_target)) / len(all_turns_towards_target)}\n")

In [None]:
def plot_turns(all_turns_towards_target, all_dists_target_at_turning_points_towards_target, all_dist_goal_at_turning_points_towards_target):
    # overall
    turning_points_towards_target = np.hstack(all_turns_towards_target)
    dist_at_turning_points_towards_target = np.hstack(all_dists_target_at_turning_points_towards_target)
    dist_goal_at_turning_points_towards_target = np.hstack(all_dist_goal_at_turning_points_towards_target)
    
    
    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,5))
    f.suptitle("pawel leadership model")
    ax1.hist(dist_at_turning_points_towards_target, bins=100)
    ax1.axvline(np.median(dist_at_turning_points_towards_target),label="median", color="g")
    ax1.set_title("histogram turning points towards target at distances to target fish")
    ax1.set_xlabel("robot distance to target fish")
    ax1.set_ylabel("number of turning points at distance X")

    ax2.hist(dist_goal_at_turning_points_towards_target, bins=100)
    ax2.axvline(np.median(dist_goal_at_turning_points_towards_target),label="median", color="g")
    ax2.set_title("histogram turning points towards target at distances to goal zone")
    ax2.set_xlabel("robot distance to goal")
    ax2.set_ylabel("number of turning points at distance X")

    print(f"overall median dist to target: {np.median(dist_at_turning_points_towards_target)}")
    print(f"overall median dist to goal: {np.median(dist_goal_at_turning_points_towards_target)}")
    plt.legend()
    plt.savefig(f"./plots/pawel_turning_points_at_dists_overall")

#### straightness index

In [None]:
def calculate_straightness_index(run):
    # get data        
    fish1_pos_this_run = np.array(run['pos'])[:,1][::3] # every third to
    fish1_dir_run = [[np.cos(angle), np.sin(angle)] for angle in np.array(run['phi'])[:,1]][::3]
    robot_pos_run =  np.array(run['pos'])[:,0][::3]
    robot_dir_run = [[np.cos(angle), np.sin(angle)] for angle in np.array(run['phi'])[:,0]][::3]

    '''
    straightness index

    (distance_start_end_point / distance_traveled)
    '''
    # get dist to next pos per timestep
    fish1_dists_per_step = np.abs(np.linalg.norm(np.diff(fish1_pos_this_run),axis=1))
    robot_dists_per_step = np.abs(np.linalg.norm(np.diff(robot_pos_run),axis=1))
    # get distance traveled
    fish1_dist_traveled = np.sum(fish1_dists_per_step)
    robot_dist_traveled = np.sum(robot_dists_per_step)
    # get dist between start end end
    fish1_dist_start_end = np.abs(np.linalg.norm(fish1_pos_this_run[0]-fish1_pos_this_run[-1]))
    robot_dist_start_end = np.abs(np.linalg.norm(robot_pos_run[0]-robot_pos_run[-1]))
    # get tortuosity (track length / distance between start and end points)
    fish1_tortuosity_straightness_index = fish1_dist_start_end / fish1_dist_traveled if fish1_dist_traveled != 0 else 0
    robot_tortuosity_straightness_index = robot_dist_start_end / robot_dist_traveled if robot_dist_traveled !=0 else 0
    #
    
    return [robot_tortuosity_straightness_index, fish1_tortuosity_straightness_index]


def plot_straightness_index(all_robot_SI, all_fish_SI):
    all_dates_robot_tortuosity_straightness_index = np.hstack(all_robot_SI)
    all_dates_fish1_tortuosity_straightness_index = np.hstack(all_fish_SI)
    print(f"overall median SI robot: {np.median(all_dates_robot_tortuosity_straightness_index)}")
    print(f"overall median SI fish: {np.median(all_dates_fish1_tortuosity_straightness_index)}")
    
    # plot straightness index
    # overall
    fig = plt.figure(figsize=(16,9))
    bins = np.linspace(0, np.max(all_dates_robot_tortuosity_straightness_index), 100)
    plt.title("overall straightness index of robot and target fish path (distance_start_end_point / distance_traveled)")
    plt.hist(all_dates_fish1_tortuosity_straightness_index, bins=bins, color="b", alpha=0.5, label="fish1 straightness index")
    plt.hist(all_dates_robot_tortuosity_straightness_index, bins=bins, color="g", alpha=0.5, label="robot straightness index")
    plt.axvline(np.median(all_dates_fish1_tortuosity_straightness_index), color="b", label="median fish")
    plt.axvline(np.median(all_dates_robot_tortuosity_straightness_index), color="g", label="median robot")
    plt.xlabel("straightness index (distance_start_end_point / distance_traveled)")
    plt.ylabel("number of runs")
    plt.legend()
    plt.show()

In [None]:
# calculate straightness index
all_robot_SI = []
all_fish_SI = []
for run in runs:
    run_SIs = calculate_straightness_index(run)
    all_robot_SI.append(run_SIs[0])
    all_fish_SI.append(run_SIs[1])
    
plot_straightness_index(all_robot_SI, all_fish_SI)
#print(f"turns per run : {len(np.hstack(all_turns_towards_target)) / len(all_turns_towards_target)}\n")

#### leading percentage

In [None]:
def calculate_leading_percentage(run):
    fish1_following_this_run = run['leadingRecord']
    run_length = run['t'][-1]
    # calc percentage of run following
    return [fish1_following_this_run, run_length]


def plot_leading_percentage(all_following_this_run, all_run_lenghts):
    
    all_runs_percentage_of_run_following = [following_run[0].sum() / len(following_run[0]) for following_run in all_following_this_run]
    print(all_runs_percentage_of_run_following[0], all_following_this_run[0])
    all_dates_percentage_of_run_following = np.hstack(all_runs_percentage_of_run_following)
    all_dates_run_lengths = np.hstack(all_run_lenghts)
    
    # histogram following percentages
    fig = plt.figure(figsize=(15,8))
    plt.title(f"[{start_date}] - [{end_date}] - histogram of leading percentages per run ")
    plt.hist(all_dates_percentage_of_run_following, bins=20)
    plt.xlabel("percentage of run time leading the target fish")
    plt.ylabel("number of runs")
    plt.savefig(f"./plots/pawel_histogram_run_length_following_percentage")
    plt.show()

    # run length following percentage correlation plot
    fig = plt.figure(figsize=(15,8))
    plt.title(f"[{start_date}] - [{end_date}] - correlation plot of run lengths and leading percentage")
    plt.scatter(all_dates_run_lengths, all_dates_percentage_of_run_following)
    plt.xlabel("run time in seconds")
    plt.ylabel("percentage of run time leading the target")
    plt.ylim(0,1)


    # linear regression        
    coef = np.polyfit(all_dates_run_lengths,all_dates_percentage_of_run_following,1)
    poly1d_fn = np.poly1d(coef) 
    # poly1d_fn is now a function which takes in x and returns an estimate for y
    plt.plot(all_dates_run_lengths,all_dates_percentage_of_run_following, 'bo', all_dates_run_lengths, poly1d_fn(all_dates_run_lengths), '--k') #'--k'=black dashed line, 'yo' = yellow circle marker

    # polynomial regression
    mymodel = np.poly1d(np.polyfit(all_dates_run_lengths, all_dates_percentage_of_run_following, 10))
    myline = np.linspace(np.min(all_dates_run_lengths), np.max(all_dates_run_lengths), 100)
    plt.plot(myline,  mymodel(myline), '--r')

    plt.savefig(f"./plots/pawel_scatter_run_length_following_percentage")


    # boxplot
    fig = plt.figure(figsize=(15,8))
    bin_edges_array = np.linspace(np.min(all_dates_run_lengths),np.max(all_dates_run_lengths),21)
    digitized_run_lengths = np.digitize(all_dates_run_lengths, bin_edges_array)
    binned_percentage_of_run_following = [[] for i in range(len(bin_edges_array)+1)]
    for id_digitized_run_length, digitized_run_length in enumerate(digitized_run_lengths):
        binned_percentage_of_run_following[digitized_run_length].append(all_dates_percentage_of_run_following[id_digitized_run_length])
    plt.title(f"[{start_date}] - [{end_date}] - box plot of run lengths and leading percentage")
    labels = []
    for id_bin_edge, bin_edge in enumerate(bin_edges_array):
        if id_bin_edge == 0:
            labels.append(f"< {bin_edge}")
        elif id_bin_edge == len(bin_edges_array)-1:
            labels.append(f"{np.round(bin_edges_array[id_bin_edge-1])}-\n{np.round(bin_edge)}")
            labels.append(f"> {np.round(bin_edge)}")
        else: labels.append(f"{np.round(bin_edges_array[id_bin_edge-1])}-\n{np.round(bin_edge)}")
    print(labels)
    plt.boxplot(binned_percentage_of_run_following[1:], labels=labels[1:])
    plt.xlabel("run time in seconds")
    plt.ylabel("percentage of run time leading the target")
    plt.ylim(0,1)
    plt.savefig(f"./plots/pawel_boxplot_run_length_following_percentage")
    plt.show()

In [None]:
# calculate following
all_following_this_run = []
all_run_lenghts = []

for run in runs:
    run_leading = calculate_leading_percentage(run)
    all_following_this_run.append(run_leading[0])
    all_run_lenghts.append(run_leading[1])
    
plot_leading_percentage(all_following_this_run, all_run_lenghts)


In [None]:
all_dates_percentage_of_run_following = [np.sum(run[0]) / len(run[0]) for run in all_following_this_run]
all_following_this_run_after_contact = [following_this_run[0][np.argmax(following_this_run[0]):] for following_this_run in all_following_this_run]
all_dates_percentage_of_run_following_after_contact = [np.sum(run) / len(run) for run in all_following_this_run_after_contact]
all_dates_run_lengths = all_run_lenghts

print("Mean leading percentage by difficulty of all runs")
print(np.mean(all_dates_percentage_of_run_following))
print("Mean leading percentage by difficulty of all runs after first contact")

print(np.mean(all_dates_percentage_of_run_following_after_contact))
print("\n")
print("Median leading percentage by difficulty of all runs")
print("Median leading percentage by difficulty of all runs after first contact")
print(np.median(all_dates_percentage_of_run_following))
print(np.median(all_dates_percentage_of_run_following_after_contact))
print("\n")


#### sample plot

In [None]:
example_run = runs[12]
POS = np.array(example_run['POS'], dtype=object)


#plot 
plt.title("sample run of comparison leadership model")

plt.plot(POS[:,0,0],POS[:,0,1],'k',label='leader')
plt.plot(POS[:,1,0],POS[:,1,1],'r',label='target fish')
plt.plot(750,1250, color='g', marker='o')

plt.xlim(600,1800)
plt.ylim(400,1500)
plt.xlabel("x position")
plt.ylabel("y position")
plt.legend()
plt.tight_layout()

plt.savefig("./plots/pawel_sample_run")
plt.show()


In [None]:
date_dict = dates_dict['2022-10-01']
quick_runs = np.array(date_dict['runs'])[np.array(dates_dict['2022-10-01']['run_lengths'])<fast_runs_percentiles[1]]
id_sample_run = 8

robot_pos = np.array(date_dict['positions'][quick_runs[id_sample_run][0]:quick_runs[id_sample_run][1]+1])
fish_pos = np.array([fish[1]['position'] for fish in np.array(date_dict['fish'][quick_runs[id_sample_run][0]:quick_runs[id_sample_run][1]+1])])


plt.title("sample run of human leadership")

plt.plot(robot_pos[:,0],robot_pos[:,1],'k',label='human leader')
plt.plot(fish_pos[:,0],fish_pos[:,1],'r',label='target fish')
plt.plot(robot_pos[-1][0],robot_pos[-1][1], color='g', marker='o')

plt.xlim(0,1800)
plt.ylim(0,2000)
plt.xlabel("x position")
plt.ylabel("y position")
plt.legend()
plt.tight_layout()

plt.savefig("./plots/human_sample_run")
plt.show()

***
# Plot

### Plot robot positions **

In [None]:
%matplotlib inline
# plot all positions
plot_all_positions(dates_dict, start_date=start_date, end_date=end_date, challenges=True, only_successful=True)

In [None]:
from util import equalize_arrays

def plot_dist_to_goal_for_all_dates(start_date=None, end_date=None, load_only_challenge_data=True, challenges=True, only_successful=True, fast_runs_percentiles=None, plot=False, verbose=False):
    all_returns = iteratively_evaluate_dates_files_runwise(eval_run_function=collect_robot_positions, start_date=start_date, end_date=end_date, load_only_challenge_data=load_only_challenge_data, challenges=challenges, only_successful=only_successful, fast_runs_percentiles=fast_runs_percentiles, run_arguments=None, plot=plot, verbose=verbose)
    all_returns = np.asarray(all_returns, dtype='object')
    
    if fast_runs_percentiles is not None:
        for id_fast_runs_percentile, fast_runs_percentile in enumerate(fast_runs_percentiles):            
            all_dates_robot_pos = [pos for run_pos in np.array(all_returns[id_fast_runs_percentile],dtype='object')[:,0] for pos in run_pos]
            
            
            
def collect_robot_positions(run, id_run, date_dict, run_arguments=None, plot=False, verbose=False):
    robot_pos_run = date_dict['positions'][run[0]:run[1]+1]
    
    return [robot_pos_run]

### Plot average rotation and position heatmap **

In [None]:
# plot average rotation and position heatmap
%matplotlib inline
plot_rotations_and_heatmap(dates_dict, start_date=start_date, end_date=end_date, challenges=True, only_successful=True, ignore_robot_standing=True, polar_density=True)

In [None]:
import matplotlib.patches as patches

def plot_position_hexmap_and_orientation_histogram_of_all_dates(start_date=None, end_date=None, load_only_challenge_data=True, challenges=True, only_successful=True, fast_runs_percentiles=None, plot=False, verbose=False):
    all_returns = iteratively_evaluate_dates_files_runwise(eval_run_function=collect_all_positions, start_date=start_date, end_date=end_date, load_only_challenge_data=load_only_challenge_data, challenges=challenges, only_successful=only_successful, fast_runs_percentiles=fast_runs_percentiles, run_arguments=None, plot=plot, verbose=verbose)
    all_returns = np.asarray(all_returns, dtype='object')
    
    ignore_robot_standing = True
    polar_density = True
    
    if fast_runs_percentiles is not None:
        for id_fast_runs_percentile, fast_runs_percentile in enumerate(fast_runs_percentiles):            
            all_dates_robot_pos = [pos for run_pos in np.array(all_returns[id_fast_runs_percentile],dtype='object')[:,0] for pos in run_pos]
            all_dates_robot_rot = [rot for run_rot in np.array(all_returns[id_fast_runs_percentile],dtype='object')[:,1] for rot in run_rot]
            all_dates_robot_dir = [rot for run_rot in np.array(all_returns[id_fast_runs_percentile],dtype='object')[:,2] for rot in run_rot]
            all_dates_fish1_pos = [pos for run_pos in np.array(all_returns[id_fast_runs_percentile],dtype='object')[:,3] for pos in run_pos]
            all_dates_fish1_dir = [rot for run_rot in np.array(all_returns[id_fast_runs_percentile],dtype='object')[:,4] for rot in run_rot]
            
            all_dates_run_lengths = np.array(all_returns[id_fast_runs_percentile],dtype='object')[:,5]
            all_dates_difficulties = np.array(all_returns[id_fast_runs_percentile],dtype='object')[:,6]
            
            
            all_dates_robot_pos_easy = [pos for run_pos in np.array(all_returns[id_fast_runs_percentile],dtype='object')[all_dates_difficulties==2][:,0] for pos in run_pos]
            all_dates_robot_rot_easy = [rot for run_rot in np.array(all_returns[id_fast_runs_percentile],dtype='object')[all_dates_difficulties==2][:,1] for rot in run_rot]
            all_dates_robot_dir_easy = [rot for run_rot in np.array(all_returns[id_fast_runs_percentile],dtype='object')[all_dates_difficulties==2][:,2] for rot in run_rot]
            all_dates_fish1_pos_easy = [pos for run_pos in np.array(all_returns[id_fast_runs_percentile],dtype='object')[all_dates_difficulties==2][:,3] for pos in run_pos]
            all_dates_fish1_dir_easy = [rot for run_rot in np.array(all_returns[id_fast_runs_percentile],dtype='object')[all_dates_difficulties==2][:,4] for rot in run_rot]
            
            all_dates_robot_pos_medium = [pos for run_pos in np.array(all_returns[id_fast_runs_percentile],dtype='object')[all_dates_difficulties==4][:,0] for pos in run_pos]
            all_dates_robot_rot_medium = [rot for run_rot in np.array(all_returns[id_fast_runs_percentile],dtype='object')[all_dates_difficulties==4][:,1] for rot in run_rot]
            all_dates_robot_dir_medium = [rot for run_rot in np.array(all_returns[id_fast_runs_percentile],dtype='object')[all_dates_difficulties==4][:,2] for rot in run_rot]
            all_dates_fish1_pos_medium = [pos for run_pos in np.array(all_returns[id_fast_runs_percentile],dtype='object')[all_dates_difficulties==4][:,3] for pos in run_pos]
            all_dates_fish1_dir_medium = [rot for run_rot in np.array(all_returns[id_fast_runs_percentile],dtype='object')[all_dates_difficulties==4][:,4] for rot in run_rot]
            
            all_dates_robot_pos_hard = [pos for run_pos in np.array(all_returns[id_fast_runs_percentile],dtype='object')[all_dates_difficulties==6][:,0] for pos in run_pos]
            all_dates_robot_rot_hard = [rot for run_rot in np.array(all_returns[id_fast_runs_percentile],dtype='object')[all_dates_difficulties==6][:,1] for rot in run_rot]
            all_dates_robot_dir_hard = [rot for run_rot in np.array(all_returns[id_fast_runs_percentile],dtype='object')[all_dates_difficulties==6][:,2] for rot in run_rot]
            all_dates_fish1_pos_hard = [pos for run_pos in np.array(all_returns[id_fast_runs_percentile],dtype='object')[all_dates_difficulties==6][:,3] for pos in run_pos]
            all_dates_fish1_dir_hard = [rot for run_rot in np.array(all_returns[id_fast_runs_percentile],dtype='object')[all_dates_difficulties==6][:,4] for rot in run_rot]
                
            if plot:
                # remove positions and rotations where robot is not moving         
                if ignore_robot_standing:
                    assert len(all_dates_robot_rot) == len(all_dates_robot_pos)
                    old_pos = [0,0]
                    skipped = 0
                    adjusted_positions = []
                    adjusted_rotations = []

                    # compare each pos to previous pos and skip if basically unchanged
                    for id_pos, pos in enumerate(all_dates_robot_pos):
                        if distance(pos, old_pos) < 0.1:
                            skipped += 1
                        else:
                            adjusted_positions.append(pos)
                            adjusted_rotations.append(all_dates_robot_rot[id_pos])
                        old_pos = pos
                    all_dates_robot_pos = adjusted_positions
                    all_dates_robot_rot = adjusted_rotations
                
                # plot overall rotation 
                # fig, ax = plt.subplots(1, 1, subplot_kw=dict(projection='polar'))
                fig = plt.figure(figsize=(10,10))
                fig.suptitle(f'[{start_date}]-[{end_date}]\nRobot: Average rotation histogram', fontsize=14)
                ax1 = plt.subplot(111, projection='polar')
                ax1.set_title("binned robot rotation")
                circular_hist(ax1, np.radians(all_dates_robot_rot),bins=16, density=polar_density, offset=0, gaps=True)
                plt.savefig(f"./plots/{id_fast_runs_percentile}_{start_date}_{end_date}_rotation_histogram")
                plt.show()

                
                # plot overall heatmap
                fig = plt.figure(figsize=(10,10))
                fig.suptitle(f'[{start_date}]-[{end_date}]\nRobot: Average rotation histogram', fontsize=14)
                ax2 = plt.subplot(111)
                x = np.array(all_dates_robot_pos)[:,0]
                y = np.array(all_dates_robot_pos)[:,1]

                # sns.jointplot(x=x, y=y, kind="hex", color="#4CB391")
                #plt.tick_params(top=True, labeltop=True, bottom=False, labelbottom=False)
                # plt.gca().invert_yaxis()
                plt.xlim(0,2100)
                plt.xlabel("x pixel coordinate") 
                plt.ylim(0,2100)
                plt.ylabel("y pixel coordinate") 
                #plt.gca().invert_xaxis()
                #plt.gca().invert_yaxis()

                plt.hexbin(x,y, gridsize=100, bins='log', cmap='inferno') # use log bins to ignore positions where robot stands still
                plt.title("hexbin plot of all loaded robot positions")

                
                #show goal zone on plot
                goal_min_x = 0
                goal_max_x = 750
                goal_min_y = 1250
                goal_max_y = 2000
                
                goal_rect = patches.Rectangle((goal_min_x,goal_min_y), 750, 750, linewidth=2, edgecolor='k', facecolor='none')
                plt.gca().add_patch(goal_rect)
                plt.savefig(f"./plots/{id_fast_runs_percentile}_{start_date}_{end_date}_hexbin_positions2")
                plt.show()
                
                # mean position and direction vectors
                overall_mean_pos = np.mean(all_dates_robot_pos,axis=0)
                easy_mean_pos = np.mean(all_dates_robot_pos_easy,axis=0)
                medium_mean_pos = np.mean(all_dates_robot_pos_medium,axis=0)
                hard_mean_pos = np.mean(all_dates_robot_pos_hard,axis=0)
                std_overall_mean_pos = np.std(all_dates_robot_pos,axis=0)
                std_easy_mean_pos = np.std(all_dates_robot_pos_easy,axis=0)
                std_medium_mean_pos = np.std(all_dates_robot_pos_medium,axis=0)
                std_hard_mean_pos = np.std(all_dates_robot_pos_hard,axis=0)
                
                overall_mean_dir = np.mean(all_dates_robot_dir,axis=0)
                easy_mean_dir = np.mean(all_dates_robot_dir_easy,axis=0)
                medium_mean_dir = np.mean(all_dates_robot_dir_medium,axis=0)
                hard_mean_dir = np.mean(all_dates_robot_dir_hard,axis=0)
                std_overall_mean_dir = np.std(all_dates_robot_dir,axis=0)
                std_easy_mean_dir = np.std(all_dates_robot_dir_easy,axis=0)
                std_medium_mean_dir = np.std(all_dates_robot_dir_medium,axis=0)
                std_hard_mean_dir = np.std(all_dates_robot_dir_hard,axis=0)
                
                print(f"overall mean position: {overall_mean_pos}")
                print(f"easy_mean_pos: {easy_mean_pos}")
                print(f"medium_mean_pos: {medium_mean_pos}")
                print(f"hard_mean_pos: {hard_mean_pos}")
                print(f"std overall mean position: {std_overall_mean_pos}")
                print(f"std easy_mean_pos: {std_easy_mean_pos}")
                print(f"std medium_mean_pos: {std_medium_mean_pos}")
                print(f"std hard_mean_pos: {std_hard_mean_pos}")
                
                print(f"overall mean direction: {overall_mean_dir}; magnitude: {np.linalg.norm(overall_mean_dir)}")
                print(f"easy_mean_dir: {easy_mean_dir}; magnitude: {np.linalg.norm(easy_mean_dir)}")
                print(f"medium_mean_dir: {medium_mean_dir}; magnitude: {np.linalg.norm(medium_mean_dir)}")
                print(f"hard_mean_dir: {hard_mean_dir}; magnitude: {np.linalg.norm(hard_mean_dir)}")
                print(f"std overall mean dir: {std_overall_mean_dir}")
                print(f"std easy_mean_dir: {std_easy_mean_dir}")
                print(f"std medium_mean_dir: {std_medium_mean_dir}")
                print(f"std hard_mean_dir: {std_hard_mean_dir}")
                
                fig = plt.figure(figsize=(10,10))
                fig.suptitle(f'[{start_date}]-[{end_date}]\nRobot: Average positions and direction vectors', fontsize=14)
                
                plt.xlim(-5,1250)
                plt.xlabel("x pixel coordinate") 
                plt.ylim(750,2100)
                plt.ylabel("y pixel coordinate")
                #plt.gca().invert_xaxis()
                #plt.gca().invert_yaxis()
                goal_rect = patches.Rectangle((goal_min_x,goal_min_y), 750, 750, linewidth=2, edgecolor='b', facecolor='none', hatch='/')
                arena_rect = patches.Rectangle((0,0), 2000, 2000, linewidth=4, edgecolor='k', facecolor='none')
                plt.gca().add_patch(goal_rect)
                plt.gca().add_patch(arena_rect)

                
                plt.scatter(overall_mean_pos[0], overall_mean_pos[1], s=20, c="k")
                plt.scatter(easy_mean_pos[0], easy_mean_pos[1], s=20, c="g")
                plt.scatter(medium_mean_pos[0], medium_mean_pos[1], s=20, c="y")
                plt.scatter(hard_mean_pos[0], hard_mean_pos[1], s=20, c="r")
                
                plt.quiver(overall_mean_pos[0], overall_mean_pos[1], overall_mean_dir[0], overall_mean_dir[1], color="k", scale=3, width=0.004, label='overall')
                plt.quiver(easy_mean_pos[0], easy_mean_pos[1], easy_mean_dir[0], easy_mean_dir[1], color="g", scale=3, width=0.004, label='easy')
                plt.quiver(medium_mean_pos[0], medium_mean_pos[1], medium_mean_dir[0], medium_mean_dir[1], color="y", scale=3, width=0.004, label='medium')
                plt.quiver(hard_mean_pos[0], hard_mean_pos[1], hard_mean_dir[0], hard_mean_dir[1], color="r", scale=3, width=0.004, label='hard')
                
                plt.legend()
                plt.savefig(f"./plots/{id_fast_runs_percentile}_{start_date}_{end_date}_scatter_avg_pos_dir")
                plt.show()
                
    return all_returns
    
def collect_all_positions(run, id_run, date_dict, run_arguments=None, plot=False, verbose=False):
    # get data    
    fish1_pos_run = np.array([all_fish_in_ts[1]['position'] for all_fish_in_ts in date_dict['fish'][run[0]:run[1]+1]])
    fish1_dir_run = np.array([all_fish_in_ts[1]['orientation'] for all_fish_in_ts in date_dict['fish'][run[0]:run[1]+1]])

    robot_pos_run = np.array(date_dict['positions'][run[0]:run[1]+1])
    robot_rot_run = np.array(date_dict['rotation'][run[0]:run[1]+1])
    robot_dir_run = np.array(date_dict['orientation'][run[0]:run[1]+1])

    
    run_length = np.array(date_dict['run_lengths'][id_run])
    difficulty = np.array(date_dict['difficulties'][id_run])
    
    return [robot_pos_run, robot_rot_run, robot_dir_run, fish1_pos_run, fish1_dir_run, run_length, difficulty]
    

In [None]:
# find turn in all runs over all dates
start_date = "2021-11-19"
end_date = "2021-11-25"
#fast_runs_percentiles = [100,25,10]

all_returns = plot_position_hexmap_and_orientation_histogram_of_all_dates(start_date=start_date, end_date=end_date, load_only_challenge_data=True, challenges=True, only_successful=True, fast_runs_percentiles=fast_runs_percentiles, plot=True, verbose=False)

In [None]:
print(len([pos for run_pos in np.array(all_returns[0],dtype='object')[:,0] for pos in run_pos]))
print(np.mean([len(run) for run in np.array(all_returns[0],dtype='object')[:,0]]))
print(np.mean([rot for run_rot in np.array(all_returns[0],dtype='object')[:,1] for rot in run_rot]))


### Plot individual runs **

In [None]:
# plot (challenge) runs
plot_runs(dates_dict, start_date="2022-09-01", end_date="2022-09-01", challenges=True, only_successful=True)

### Plot start and end points of runs **

In [None]:
# plot start and end points for all (challenge) runs
plot_starts_ends(dates_dict, start_date=None, end_date=None, challenges=True, only_successful=True)

### Plot run length histogram **

In [None]:
# plot run length histogram
plot_run_length_hist(dates_dict, start_date=start_date, end_date=None, bin_size=5, challenges=True, only_successful=True)

In [None]:
def plot_run_length_of_all_dates(start_date=None, end_date=None, load_only_challenge_data=True, challenges=True, only_successful=True, fast_runs_percentiles=None, plot=False, verbose=False):
    all_returns = iteratively_evaluate_dates_files_runwise(eval_run_function=collect_all_run_lengths, start_date=start_date, end_date=end_date, load_only_challenge_data=load_only_challenge_data, challenges=challenges, only_successful=only_successful, fast_runs_percentiles=fast_runs_percentiles, run_arguments=None, plot=plot, verbose=verbose)
    all_returns = np.asarray(all_returns, dtype='object')
    
    bin_size = 20
    
    
    if fast_runs_percentiles is not None:
        for id_fast_runs_percentile, fast_runs_percentile in enumerate(fast_runs_percentiles):            
            all_dates_run_lengths = np.hstack(np.array(all_returns[id_fast_runs_percentile])[:,0])
            all_dates_difficulties = np.hstack(np.array(all_returns[id_fast_runs_percentile])[:,1])
                
            fig = plt.figure(figsize=(15,5))
            fig.suptitle(f'[{start_date}]-[{end_date}] - run length histogram', fontsize=14)
            #bins = list(range(0,200,bin_size))
            bins = np.linspace(0,np.max(all_dates_run_lengths), bin_size)

            plt.xticks(bins)
            plt.hist(all_dates_run_lengths, bins=bins,align="left")
            plt.axvline(np.median(all_dates_run_lengths), color='k', linestyle='dashed', linewidth=2)
            plt.xlabel("time for run (s)")
            plt.ylabel("number of runs")
            plt.savefig(f"./plots/{id_fast_runs_percentile}_run_length_histogram")
            
            #
            easy_run_lengths = all_dates_run_lengths[all_dates_difficulties==2]
            medium_run_lengths = all_dates_run_lengths[all_dates_difficulties==4]
            hard_run_lengths = all_dates_run_lengths[all_dates_difficulties==6]
            '''
            fig = plt.figure(figsize=(15,5))
            fig.suptitle(f'[{start_date}]-[{end_date}] - run length histogram - easy', fontsize=14)
            #bins=list(range(0,int(np.ceil(np.max(easy_run_lengths))),bin_size))
            bins = np.linspace(0,np.max(easy_run_lengths), bin_size)
            plt.xticks(bins)
            plt.hist(easy_run_lengths, bins=bins,align="left")
            plt.axvline(np.median(easy_run_lengths), color='k', linestyle='dashed', linewidth=2)
            plt.xlabel("time for run (s)")
            plt.ylabel("number of runs")
            plt.savefig(f"./plots/{id_fast_runs_percentile}_run_length_histogram_easy")
            
            fig = plt.figure(figsize=(15,5))
            fig.suptitle(f'[{start_date}]-[{end_date}] - run length histogram - medium', fontsize=14)
            #bins=list(range(0,int(np.ceil(np.max(medium_run_lengths))),bin_size))
            bins = np.linspace(0,np.max(medium_run_lengths), bin_size)
            plt.xticks(bins)
            plt.hist(medium_run_lengths, bins=bins,align="left")
            plt.axvline(np.median(medium_run_lengths), color='k', linestyle='dashed', linewidth=2)
            plt.xlabel("time for run (s)")
            plt.ylabel("number of runs")
            plt.savefig(f"./plots/{id_fast_runs_percentile}_run_length_histogram_medium")
            
            fig = plt.figure(figsize=(15,5))
            fig.suptitle(f'[{start_date}]-[{end_date}] - run length histogram - hard', fontsize=14)
            #bins=list(range(0,int(np.ceil(np.max(hard_run_lengths))),bin_size))
            bins = np.linspace(0,np.max(hard_run_lengths), bin_size)
            plt.xticks(bins)
            plt.hist(hard_run_lengths, bins=bins,align="left")
            plt.axvline(np.median(hard_run_lengths), color='k', linestyle='dashed', linewidth=2)
            plt.xlabel("time for run (s)")
            plt.ylabel("number of runs")
            plt.savefig(f"./plots/{id_fast_runs_percentile}_run_length_histogram_hard")
            '''
            fig = plt.figure(figsize=(15,5))
            fig.suptitle(f'[{start_date}]-[{end_date}] - run length histogram stacked difficulties', fontsize=14)
            #bins = list(range(0,200,bin_size))
            bins = np.linspace(0,np.max(all_dates_run_lengths), bin_size)

            plt.xticks(bins)
            #plt.hist(all_dates_run_lengths, bins=bins, align="left")
            plt.hist([easy_run_lengths, medium_run_lengths, hard_run_lengths], bins=bins, align="left", stacked=True, label=["easy", "medium", "hard"])
            plt.axvline(np.median(all_dates_run_lengths), color='k', linestyle='dashed', linewidth=2)
            plt.xlabel("time for run (s)")
            plt.ylabel("number of runs")
            plt.legend()
            plt.savefig(f"./plots/{id_fast_runs_percentile}_run_length_histogram_stacked_difficulties")
            
            
            print(f"\nmean all:{np.median(all_dates_run_lengths)}")
            print(f"mean easy:{np.median(easy_run_lengths)}")
            print(f"mean medium:{np.median(medium_run_lengths)}")
            print(f"mean hard:{np.median(hard_run_lengths)}")
            
    return all_returns
    
def collect_all_run_lengths(run, id_run, date_dict, run_arguments=None, plot=False, verbose=False):
    # get data    
    run_length = date_dict['run_lengths'][id_run]
    difficulty = date_dict['difficulties'][id_run]
    
    return [run_length, difficulty]
    

In [None]:
# find turn in all runs over all dates
start_date = "2021-11-19"
end_date = "2022-10-25"
fast_runs_percentiles = [200,20.721,15.8846]

all_returns = plot_run_length_of_all_dates(start_date=start_date, end_date=end_date, load_only_challenge_data=True, challenges=True, only_successful=True, fast_runs_percentiles=fast_runs_percentiles, plot=True, verbose=False)

In [None]:
all_dates_run_lengths_100 = np.hstack(np.array(all_returns[0])[:,0])
all_dates_run_lengths_25 = np.hstack(np.array(all_returns[1])[:,0])
all_dates_run_lengths_10 = np.hstack(np.array(all_returns[2])[:,0])

all_dates_difficulties_100 = np.hstack(np.array(all_returns[0])[:,1])
all_dates_difficulties_25 = np.hstack(np.array(all_returns[1])[:,1])
all_dates_difficulties_10 = np.hstack(np.array(all_returns[2])[:,1])

percentile_10 = np.percentile(all_dates_run_lengths_100, 10)

percentile_25 = np.percentile(all_dates_run_lengths_100, 25)

print(np.median(all_dates_run_lengths_100))
print(np.median(all_dates_run_lengths_25))
print(np.median(all_dates_run_lengths_10))

print('\n')
print(len(all_dates_run_lengths_100[all_dates_difficulties_100==2])/(len(all_dates_run_lengths_100))*100)
print(len(all_dates_run_lengths_100[all_dates_difficulties_100==4])/(len(all_dates_run_lengths_100))*100)
print(len(all_dates_run_lengths_100[all_dates_difficulties_100==6])/(len(all_dates_run_lengths_100))*100)
print('\n')
print(len(all_dates_run_lengths_25[all_dates_difficulties_25==2])/(len(all_dates_run_lengths_25))*100)
print(len(all_dates_run_lengths_25[all_dates_difficulties_25==4])/(len(all_dates_run_lengths_25))*100)
print(len(all_dates_run_lengths_25[all_dates_difficulties_25==6])/(len(all_dates_run_lengths_25))*100)
print('\n')
print(len(all_dates_run_lengths_10[all_dates_difficulties_10==2])/(len(all_dates_run_lengths_10))*100)
print(len(all_dates_run_lengths_10[all_dates_difficulties_10==4])/(len(all_dates_run_lengths_10))*100)
print(len(all_dates_run_lengths_10[all_dates_difficulties_10==6])/(len(all_dates_run_lengths_10))*100)


print(f"All run lengths percentile 25: {percentile_25} and all run lenghts percentile 10: {percentile_10}")
print(f"Number of all runs:{len(all_dates_run_lengths_100)}; Number of runs under 25th percentile: {len(all_dates_run_lengths_25)}; Number of runs under 10th percentile: {len(all_dates_run_lengths_10)}")

### Plot inter-individual distances between robot and target fish for all (challenge) runs **

In [None]:
# plot inter-individual distances between robot and target fish for all (challenge) runs
plot_inter_individual_distances(dates_dict, start_date, end_date, challenges=True, only_successful=True, bins=10)

In [None]:
def plot_inter_individual_distances_of_all_dates(start_date=None, end_date=None, load_only_challenge_data=True, challenges=True, only_successful=True, fast_runs_percentiles=None, plot=False, verbose=False):
    all_returns = iteratively_evaluate_dates_files_runwise(eval_run_function=calculate_inter_individual_distances_for_run, start_date=start_date, end_date=end_date, load_only_challenge_data=load_only_challenge_data, challenges=challenges, only_successful=only_successful, fast_runs_percentiles=fast_runs_percentiles, run_arguments=None, plot=plot, verbose=verbose)
    all_returns = np.asarray(all_returns, dtype='object')    
    
    if fast_runs_percentiles is not None:
        for id_fast_runs_percentile, fast_runs_percentile in enumerate(fast_runs_percentiles):            
            all_dates_inter_individual_distances = np.array(all_returns[id_fast_runs_percentile], dtype='object')[:,0]
            all_dates_binned_individual_distances = [[] for bin in range(len(np.array(all_returns[id_fast_runs_percentile], dtype='object')[:,1][0]))]
            for binned_run_dists in np.array(all_returns[id_fast_runs_percentile], dtype='object')[:,1]:
                for bin_id, mean_dist_bin in enumerate(binned_run_dists):
                    all_dates_binned_individual_distances[bin_id].append(mean_dist_bin)
            all_dates_difficulties = np.array(all_returns[id_fast_runs_percentile], dtype='object')[:,2]
            
            easy_all_dates_inter_individual_distances = np.array(all_returns[id_fast_runs_percentile], dtype='object')[all_dates_difficulties==2][:,0]
            easy_all_dates_binned_individual_distances = [[] for bin in range(len(np.array(all_returns[id_fast_runs_percentile], dtype='object')[all_dates_difficulties==2][:,1][0]))]
            for binned_run_dists in np.array(all_returns[id_fast_runs_percentile], dtype='object')[all_dates_difficulties==2][:,1]:
                for bin_id, mean_dist_bin in enumerate(binned_run_dists):
                    easy_all_dates_binned_individual_distances[bin_id].append(mean_dist_bin)
                    
            medium_all_dates_inter_individual_distances = np.array(all_returns[id_fast_runs_percentile], dtype='object')[all_dates_difficulties==4][:,0]
            medium_all_dates_binned_individual_distances = [[] for bin in range(len(np.array(all_returns[id_fast_runs_percentile], dtype='object')[all_dates_difficulties==4][:,1][0]))]
            for binned_run_dists in np.array(all_returns[id_fast_runs_percentile], dtype='object')[all_dates_difficulties==4][:,1]:
                for bin_id, mean_dist_bin in enumerate(binned_run_dists):
                    medium_all_dates_binned_individual_distances[bin_id].append(mean_dist_bin)
                    
            hard_all_dates_inter_individual_distances = np.array(all_returns[id_fast_runs_percentile], dtype='object')[all_dates_difficulties==6][:,0]
            hard_all_dates_binned_individual_distances = [[] for bin in range(len(np.array(all_returns[id_fast_runs_percentile], dtype='object')[all_dates_difficulties==6][:,1][0]))]
            for binned_run_dists in np.array(all_returns[id_fast_runs_percentile], dtype='object')[all_dates_difficulties==6][:,1]:
                for bin_id, mean_dist_bin in enumerate(binned_run_dists):
                    hard_all_dates_binned_individual_distances[bin_id].append(mean_dist_bin)
            
            
            #plot overall
            # bar plot all mean idds per bin
            fig = plt.figure(num=f"{start_date}-{end_date} - frame-scliced inter-individual distances between robot and target fish (id 1)", figsize=(15,5))
            
            #overall_mean_iid_per_bin = np.nanmean(all_dates_binned_individual_distances, axis=0)
            #overall_std_iid_per_bin = np.nanstd(all_mean_iid_per_bin, axis=0)
            #ticks = np.arange(len(overall_mean_iid_per_bin))
            #plt.bar(x=ticks, height=overall_mean_iid_per_bin, yerr=overall_std_iid_per_bin, tick_label=ticks, color='black', ecolor="gray", capsize=4)
            plt.boxplot(all_dates_binned_individual_distances, sym="")
            
            plt.title(f"{start_date}-{end_date} - frame-binned average inter-individual distances between robot and target fish (id 1) - overall")
            plt.xlabel("i-th part of frames")
            plt.ylabel("distance in px")
            plt.ylim(0,2000)
            '''
                challenge - zones
                zor: 40
                zoo: 100
                zoa: 220
            '''
            plt.axhline(40, linewidth=2, color='r', linestyle=(0,(2,2)), label="repulsion") # repulsion distance
            plt.axhline(100, linewidth=2, color='b', linestyle=(0,(2,2)), label="orientation") # orientation distance
            plt.axhline(220, linewidth=2, color='g', linestyle=(0,(2,2)), label="attraction") # attraction distance

            plt.legend()
            plt.savefig(f"./plots/{id_fast_runs_percentile}_iid_box_plot3_overall")
            plt.show()
            
            # easy
    
            # bar plot all mean idds per bin
            fig = plt.figure(num=f"{start_date}-{end_date} - frame-scliced inter-individual distances between robot and target fish (id 1)", figsize=(15,5))
            plt.boxplot(easy_all_dates_binned_individual_distances, sym="")
            
            plt.title(f"{start_date}-{end_date} - frame-binned average inter-individual distances between robot and target fish (id 1) - easy")
            plt.xlabel("i-th part of frames")
            plt.ylabel("distance in px")
            plt.ylim(0,2000)
            '''
                challenge - zones
                zor: 40
                zoo: 100
                zoa: 220
            '''
            plt.axhline(40, linewidth=2, color='r', linestyle=(0,(2,2)), label="repulsion") # repulsion distance
            plt.axhline(100, linewidth=2, color='b', linestyle=(0,(2,2)), label="orientation") # orientation distance
            plt.axhline(220, linewidth=2, color='g', linestyle=(0,(2,2)), label="attraction") # attraction distance

            plt.legend()
            plt.savefig(f"./plots/{id_fast_runs_percentile}_iid_box_plot3_easy")
            plt.show()
            
            # medium
            # bar plot all mean idds per bin
            fig = plt.figure(num=f"{start_date}-{end_date} - frame-scliced inter-individual distances between robot and target fish (id 1)", figsize=(15,5))
            plt.boxplot(medium_all_dates_binned_individual_distances, sym="")
            
            plt.title(f"{start_date}-{end_date} - frame-binned average inter-individual distances between robot and target fish (id 1) - medium")
            plt.xlabel("i-th part of frames")
            plt.ylabel("distance in px")
            plt.ylim(0,2000)
            '''
                challenge - zones
                zor: 40
                zoo: 100
                zoa: 220
            '''
            plt.axhline(40, linewidth=2, color='r', linestyle=(0,(2,2)), label="repulsion") # repulsion distance
            plt.axhline(100, linewidth=2, color='b', linestyle=(0,(2,2)), label="orientation") # orientation distance
            plt.axhline(220, linewidth=2, color='g', linestyle=(0,(2,2)), label="attraction") # attraction distance

            plt.legend()
            plt.savefig(f"./plots/{id_fast_runs_percentile}_iid_box_plot3_medium")
            plt.show()
            
            
            # hard
            # bar plot all mean idds per bin
            fig = plt.figure(num=f"{start_date}-{end_date} - frame-scliced inter-individual distances between robot and target fish (id 1)", figsize=(15,5))
            plt.boxplot(hard_all_dates_binned_individual_distances, sym="")
            
            plt.title(f"{start_date}-{end_date} - frame-binned average inter-individual distances between robot and target fish (id 1) - hard")
            plt.xlabel("i-th part of frames")
            plt.ylabel("distance in px")
            plt.ylim(0,2000)
            '''
                challenge - zones
                zor: 40
                zoo: 100
                zoa: 220
            '''
            plt.axhline(40, linewidth=2, color='r', linestyle=(0,(2,2)), label="repulsion") # repulsion distance
            plt.axhline(100, linewidth=2, color='b', linestyle=(0,(2,2)), label="orientation") # orientation distance
            plt.axhline(220, linewidth=2, color='g', linestyle=(0,(2,2)), label="attraction") # attraction distance

            plt.legend()
            plt.savefig(f"./plots/{id_fast_runs_percentile}_iid_box_plot3_hard")
            plt.show()
            
            print(all_dates_inter_individual_distances)
            print(f"overall - {np.median(all_dates_inter_individual_distances.flatten())}")  
            print(f"easy - {np.median(easy_all_dates_inter_individual_distances)}")
            print(f"medium - {np.median(medium_all_dates_inter_individual_distances)}") 
            print(f"hard - {np.median(hard_all_dates_inter_individual_distances)}") 
                  
            
            
    
    return all_returns
        
    
def calculate_inter_individual_distances_for_run(run, id_run, date_dict, run_arguments=None, plot=False, verbose=False):
    
    bins=15
    
    fish1_pos_this_run = np.array([all_fish_in_ts[1]['position'] for all_fish_in_ts in date_dict['fish'][run[0]:run[1]+1]])
    robot_pos_run = np.array(date_dict['positions'][run[0]:run[1]+1])
    difficulty = date_dict['difficulties'][id_run]
    # sanity check
    if len(fish1_pos_this_run) != len(robot_pos_run):
        assert False, f"Wrong array lengths: fish {len(fish1_pos_this_run)} and robot {len(robot_pos_run)}"
    # all iid
    ii_distances_r_f1_run = np.linalg.norm(fish1_pos_this_run-robot_pos_run, axis=1)

    # frame-binned
    indexes = list(range(len(fish1_pos_this_run)))
    hist, bin_edges = np.histogram(indexes, bins=bins, range=None, weights=None, density=None) # bin the indexes to get sclice edges
    # slice the positions by frame-bins and take average per slice
    pointer = 0
    mean_iid_per_bin = []
    for id_edge, edge in enumerate(hist):
        # print(hist)
        # print(edge)
        slice_ = ii_distances_r_f1_run[pointer:pointer+edge]
        if len(slice_) > 0:
            mean_dist_sclice = np.nanmean(slice_)
        else:
            mean_dist_sclice = np.nan # is ignored later

        # print(mean_dist_sclice)
        mean_iid_per_bin.append(mean_dist_sclice)
        pointer = pointer + edge

    
    return [ii_distances_r_f1_run, mean_iid_per_bin, difficulty]
    

In [None]:
# find turn in all runs over all dates
start_date = "2021-11-19"
end_date = "2021-11-25"
#fast_runs_percentiles = [100,25,10]

all_returns = plot_inter_individual_distances_of_all_dates(start_date=start_date, end_date=end_date, load_only_challenge_data=True, challenges=True, only_successful=True, fast_runs_percentiles=fast_runs_percentiles, plot=True, verbose=False)

In [None]:
len(all_returns[2])

### Plot robot distance to goal over time **

In [None]:
# plot robot distance to goal over time
%matplotlib inline
plot_robot_distance_goal(dates_dict, start_date=None, end_date=None, challenges=True, only_successful=True)

In [None]:
from util import equalize_arrays

def plot_dist_to_goal_for_all_dates(start_date=None, end_date=None, load_only_challenge_data=True, challenges=True, only_successful=True, fast_runs_percentiles=None, plot=False, verbose=False):
    all_returns = iteratively_evaluate_dates_files_runwise(eval_run_function=calculate_dist_to_goal_for_run, start_date=start_date, end_date=end_date, load_only_challenge_data=load_only_challenge_data, challenges=challenges, only_successful=only_successful, fast_runs_percentiles=fast_runs_percentiles, run_arguments=None, plot=plot, verbose=verbose)
    all_returns = np.asarray(all_returns, dtype='object')
    
    if fast_runs_percentiles is not None:
        for id_fast_runs_percentile, fast_runs_percentile in enumerate(fast_runs_percentiles):            
            all_dates_robot_dist_goal = np.array(all_returns[id_fast_runs_percentile], dtype='object')[:,0]
            
            #plot
            fig = plt.figure(figsize=(15,5))
            
            for run_dist_goal in all_dates_robot_dist_goal:
                plt.plot(run_dist_goal)
            
            if len(all_dates_robot_dist_goal) > 0:
                equalized_arrays = equalize_arrays(all_dates_robot_dist_goal, np.nan)
                mean_dist_goal = np.nanmean(equalized_arrays, axis=0)
                plt.plot(mean_dist_goal, color='green', linewidth=6)
            
            plt.title(f"{start_date}-{end_date} - robot distance to target")
            plt.ylabel("distance to goal (px)")
            plt.xlabel("frames")
    return all_returns

def calculate_dist_to_goal_for_run(run, id_run, date_dict, run_arguments=None, plot=False, verbose=False):
    robot_pos_run = date_dict['positions'][run[0]:run[1]+1]
    robot_dist_goal = get_distance_to_goal(robot_pos_run)
    return [robot_dist_goal]


In [None]:
# find turn in all runs over all dates
start_date = "2021-11-19"
end_date = "2021-11-25"
fast_runs_percentiles = [100,25,10]

all_returns = plot_dist_to_goal_for_all_dates(start_date=start_date, end_date=end_date, load_only_challenge_data=True, challenges=True, only_successful=True, fast_runs_percentiles=fast_runs_percentiles, plot=True, verbose=False)

### Plot following state **


In [None]:
from plot import plot_following1
# plot histogram of leading percentages and correlation plot of run length and leading percentage
plot_following1(dates_dict, start_date=start_date, end_date=end_date, only_successful=True, challenges=True) 

In [None]:
from util import equalize_arrays
from scipy.stats import zscore

def reject_outliers(data, m=2):
    return data[abs(data - np.mean(data)) < m * np.std(data)]

def plot_following_percentage_for_all_dates(start_date=None, end_date=None, load_only_challenge_data=True, challenges=True, only_successful=True, fast_runs_percentiles=None, plot=False, verbose=False):
    all_returns = iteratively_evaluate_dates_files_runwise(eval_run_function=calculate_following_for_run, start_date=start_date, end_date=end_date, load_only_challenge_data=load_only_challenge_data, challenges=challenges, only_successful=only_successful, fast_runs_percentiles=fast_runs_percentiles, run_arguments=None, plot=plot, verbose=verbose)
    all_returns = np.asarray(all_returns, dtype='object')
    
    if fast_runs_percentiles is not None:
        for id_fast_runs_percentile, fast_runs_percentile in enumerate(fast_runs_percentiles):            
            all_dates_percentage_of_run_following = np.array(all_returns[id_fast_runs_percentile], dtype=object)[:,0]
            all_dates_run_lengths = np.array(all_returns[id_fast_runs_percentile], dtype=object)[:,1]
            
            # filter out long runs from days with low amount of runs
            zscore_all_dates_run_lengths = zscore(all_dates_run_lengths.astype(float))
            
            all_dates_percentage_of_run_following = all_dates_percentage_of_run_following.astype(float)[zscore_all_dates_run_lengths < 5]
            all_dates_run_lengths = all_dates_run_lengths.astype(float)[zscore_all_dates_run_lengths < 5]

            #plot
            
            # histogram following percentages
            fig = plt.figure(figsize=(15,8))
            plt.title(f"[{start_date}] - [{end_date}] - histogram of leading percentages per run ")
            plt.hist(all_dates_percentage_of_run_following, bins=20)
            plt.xlabel("percentage of run time leading the target fish")
            plt.ylabel("number of runs")
            plt.savefig(f"./plots/{id_fast_runs_percentile}_histogram_run_length_following_percentage")
            plt.show()

            # run length following percentage correlation plot
            fig = plt.figure(figsize=(15,8))
            plt.title(f"[{start_date}] - [{end_date}] - correlation plot of run lengths and leading percentage")
            plt.scatter(all_dates_run_lengths, all_dates_percentage_of_run_following)
            plt.xlabel("run time in seconds")
            plt.ylabel("percentage of run time leading the target")
            plt.ylim(0,1)


            # linear regression        
            coef = np.polyfit(all_dates_run_lengths,all_dates_percentage_of_run_following,1)
            poly1d_fn = np.poly1d(coef) 
            # poly1d_fn is now a function which takes in x and returns an estimate for y
            plt.plot(all_dates_run_lengths,all_dates_percentage_of_run_following, 'bo', all_dates_run_lengths, poly1d_fn(all_dates_run_lengths), '--k') #'--k'=black dashed line, 'yo' = yellow circle marker

            # polynomial regression
            mymodel = np.poly1d(np.polyfit(all_dates_run_lengths, all_dates_percentage_of_run_following, 10))
            myline = np.linspace(np.min(all_dates_run_lengths), np.max(all_dates_run_lengths), 100)
            plt.plot(myline,  mymodel(myline), '--r')
            
            plt.savefig(f"./plots/{id_fast_runs_percentile}_scatter_run_length_following_percentage")
            
            
            # boxplot
            fig = plt.figure(figsize=(15,8))
            bin_edges_array = np.linspace(np.min(all_dates_run_lengths),np.max(all_dates_run_lengths),21)
            digitized_run_lengths = np.digitize(all_dates_run_lengths, bin_edges_array)
            binned_percentage_of_run_following = [[] for i in range(len(bin_edges_array)+1)]
            for id_digitized_run_length, digitized_run_length in enumerate(digitized_run_lengths):
                binned_percentage_of_run_following[digitized_run_length].append(all_dates_percentage_of_run_following[id_digitized_run_length])
            plt.title(f"[{start_date}] - [{end_date}] - box plot of run lengths and leading percentage")
            labels = []
            for id_bin_edge, bin_edge in enumerate(bin_edges_array):
                if id_bin_edge == 0:
                    labels.append(f"< {bin_edge}")
                elif id_bin_edge == len(bin_edges_array)-1:
                    labels.append(f"{np.round(bin_edges_array[id_bin_edge-1])}-\n{np.round(bin_edge)}")
                    labels.append(f"> {np.round(bin_edge)}")
                else: labels.append(f"{np.round(bin_edges_array[id_bin_edge-1])}-\n{np.round(bin_edge)}")
            print(labels)
            plt.boxplot(binned_percentage_of_run_following[1:], labels=labels[1:])
            plt.xlabel("run time in seconds")
            plt.ylabel("percentage of run time leading the target")
            plt.ylim(0,1)
            plt.savefig(f"./plots/{id_fast_runs_percentile}_boxplot_run_length_following_percentage")
            plt.show()
    
    return all_returns

def calculate_following_for_run(run, id_run, date_dict, run_arguments=None, plot=False, verbose=False):
    fish1_following_this_run = np.array([all_fish_in_ts[1]['following'] for all_fish_in_ts in date_dict['fish'][run[0]:run[1]+1]])
    run_length = date_dict['run_lengths'][id_run]
    difficulty = date_dict['difficulties'][id_run]
    
    # calc percentage of run following
    percentage_of_run_following = fish1_following_this_run.sum() / len(fish1_following_this_run)
    return [percentage_of_run_following, run_length, difficulty, fish1_following_this_run]

In [None]:
# find turn in all runs over all dates
start_date = "2021-11-19"
end_date = "2022-10-25"
#fast_runs_percentiles = [100,25,10]

all_returns = plot_following_percentage_for_all_dates(start_date=start_date, end_date=end_date, load_only_challenge_data=True, challenges=True, only_successful=True, fast_runs_percentiles=fast_runs_percentiles, plot=True, verbose=False)

In [None]:
all_dates_percentage_of_run_following_100 = np.array(all_returns[0],dtype=object)[:,0]
all_dates_run_lengths_100 = np.array(all_returns[0],dtype=object)[:,1]
all_dates_diffculty_100 = np.array(all_returns[0],dtype=object)[:,2]
all_dates_following_100_after_contact =  [following_this_run[np.argmax(following_this_run):] for following_this_run in np.array(all_returns[0],dtype=object)[:,3]]
all_dates_percentage_of_run_following_100_after_contact = [np.sum(run) / len(run) for run in all_dates_following_100_after_contact]


all_dates_percentage_of_run_following_25 = np.array(all_returns[1],dtype=object)[:,0]
all_dates_run_lengths_25 = np.array(all_returns[1],dtype=object)[:,1]
all_dates_diffculty_25 = np.array(all_returns[1],dtype=object)[:,2]
all_dates_following_25_after_contact =  [following_this_run[np.argmax(following_this_run):] for following_this_run in np.array(all_returns[1],dtype=object)[:,3]]
all_dates_percentage_of_run_following_25_after_contact = [np.sum(run) / len(run) for run in all_dates_following_25_after_contact]


all_dates_percentage_of_run_following_10 = np.array(all_returns[2],dtype=object)[:,0]
all_dates_run_lengths_10 = np.array(all_returns[2],dtype=object)[:,1]
all_dates_diffculty_10 = np.array(all_returns[2],dtype=object)[:,2]
all_dates_following_10_after_contact =  [following_this_run[np.argmax(following_this_run):] for following_this_run in np.array(all_returns[2],dtype=object)[:,3]]
all_dates_percentage_of_run_following_10_after_contact = [np.sum(run) / len(run) for run in all_dates_following_10_after_contact]

print("Mean leading percentage by difficulty of all, quartile and tenth percentile runs")
print(np.mean(all_dates_percentage_of_run_following_100))
print(np.mean(all_dates_percentage_of_run_following_25))
print(np.mean(all_dates_percentage_of_run_following_10))
print("Mean leading percentage by difficulty of all, quartile and tenth percentile runs after first contact")
print(np.mean(all_dates_percentage_of_run_following_100_after_contact))
print(np.mean(all_dates_percentage_of_run_following_25_after_contact))
print(np.mean(all_dates_percentage_of_run_following_10_after_contact))



print("\n")
print("Median leading percentage by difficulty of all, quartile and tenth percentile runs")

print(np.median(all_dates_percentage_of_run_following_100))
print(np.median(all_dates_percentage_of_run_following_25))
print(np.median(all_dates_percentage_of_run_following_10))

print("Median leading percentage by difficulty of all, quartile and tenth percentile runs after first contact")
print(np.median(all_dates_percentage_of_run_following_100_after_contact))
print(np.median(all_dates_percentage_of_run_following_25_after_contact))
print(np.median(all_dates_percentage_of_run_following_10_after_contact))


print("\n")

print("\n")
print("Median leading percentage by difficulty of all runs")
print(np.median(all_dates_percentage_of_run_following_100[all_dates_diffculty_100==2]))
print(np.median(all_dates_percentage_of_run_following_100[all_dates_diffculty_100==4]))
print(np.median(all_dates_percentage_of_run_following_100[all_dates_diffculty_100==6]))
print("\n")
print("Median leading percentage by difficulty of quartile runs")
print(np.median(all_dates_percentage_of_run_following_25[all_dates_diffculty_25==2]))
print(np.median(all_dates_percentage_of_run_following_25[all_dates_diffculty_25==4]))
print(np.median(all_dates_percentage_of_run_following_25[all_dates_diffculty_25==6]))
print("\n")
print("Median leading percentage by difficulty of tenth percentile runs")
print(np.median(all_dates_percentage_of_run_following_10[all_dates_diffculty_10==2]))
print(np.median(all_dates_percentage_of_run_following_10[all_dates_diffculty_10==4]))
print(np.median(all_dates_percentage_of_run_following_10[all_dates_diffculty_10==6]))

print("\n")

### Plot run length and initial target (fish and zone) dist correlation

In [None]:
plot_runlength_dist_goal_target_corr(dates_dict, start_date=None, end_date=None, only_successful=True, challenges=True, show=True)

In [None]:
def plot_run_length_initial_dist_for_all_dates(start_date=None, end_date=None, load_only_challenge_data=True, challenges=True, only_successful=True, fast_runs_percentiles=None, plot=False, verbose=False):
    all_returns = iteratively_evaluate_dates_files_runwise(eval_run_function=calculate_run_length_initial_dist_for_run, start_date=start_date, end_date=end_date, load_only_challenge_data=load_only_challenge_data, challenges=challenges, only_successful=only_successful, fast_runs_percentiles=fast_runs_percentiles, run_arguments=None, plot=plot, verbose=verbose)
    all_returns = np.asarray(all_returns, dtype='object')
    
    
    if fast_runs_percentiles is not None:
        fig1 = plt.figure(figsize=(16,9))
        perc_labels = ["all", "top quartile", "top tenth percentile"]
        for id_fast_runs_percentile, fast_runs_percentile in enumerate(fast_runs_percentiles):            
            all_dates_initial_distance_target = np.array(all_returns[id_fast_runs_percentile])[:,0]
            all_dates_initial_dist_goal = np.array(all_returns[id_fast_runs_percentile])[:,1]
            all_dates_run_lengths = np.array(all_returns[id_fast_runs_percentile])[:,2]
    
            # plot
            #fig1 = plt.figure(figsize=(16,9))
            #plt.scatter(all_dates_run_lengths, all_dates_initial_distance_target, label="distance to target fish", s=3)
            #plt.scatter(all_dates_run_lengths, all_dates_initial_dist_goal, label="distance to goal", s=3)
#
            #plt.xlabel("run length in seconds")
            #plt.ylabel("initial distance to target fish or goal area in px")
            #plt.title(f"[{start_date}] - [{end_date}] - correlation from run length to distance to goal and target fish")
            #plt.legend()
            #plt.savefig(f"./plots/{id_fast_runs_percentile}_scatter_run_length_initial_distance_target_goal")
#
#
            ##
            #fig2 = plt.figure(figsize=(16,9))
            #plt.scatter(all_dates_run_lengths, np.abs(np.array(all_dates_initial_dist_goal)-np.array(all_dates_initial_distance_target)), s=3)
            #plt.title(f"[{start_date}] - [{end_date}] - correlation from run length to difference of distance to goal and distance to target")
            #plt.xlabel("run length in seconds")
            #plt.savefig(f"./plots/{id_fast_runs_percentile}_scatter_run_length_difference_distance_target_goal")
#
            #plt.show()
            
            #
            #fig1 = plt.figure(figsize=(16,9))
            #bin_edges_array = np.linspace(np.min(all_dates_run_lengths),np.max(all_dates_run_lengths),21)
            #digitized_run_lengths = np.digitize(all_dates_run_lengths, bin_edges_array)
            #binned_initial_distance_target = [[] for i in range(len(bin_edges_array)+1)]
            #for id_digitized_run_length, digitized_run_length in enumerate(digitized_run_lengths):
            #    binned_initial_distance_target[digitized_run_length].append(all_dates_initial_distance_target[id_digitized_run_length])
            #plt.title(f"[{start_date}] - [{end_date}] - box plot of run lengths and initial distances to target")
            #labels = []
            #for id_bin_edge, bin_edge in enumerate(bin_edges_array):
            #    if id_bin_edge == 0:
            #        labels.append(f"< {bin_edge}")
            #    elif id_bin_edge == len(bin_edges_array)-1:
            #        labels.append(f"{np.round(bin_edges_array[id_bin_edge-1])}-\n{np.round(bin_edge)}")
            #        labels.append(f"> {np.round(bin_edge)}")
            #    else: labels.append(f"{np.round(bin_edges_array[id_bin_edge-1])}-\n{np.round(bin_edge)}")
            #plt.boxplot(binned_initial_distance_target[1:], labels=labels[1:])
            #plt.xlabel("run time in seconds")
            #plt.ylabel("initial distance to target")
            ##plt.ylim(0,1)
            #plt.savefig(f"./plots/{id_fast_runs_percentile}_boxplot_run_length_initial_distance_target")
            #plt.show()
            #
            ##
            #fig1 = plt.figure(figsize=(16,9))
            #bin_edges_array = np.linspace(np.min(all_dates_run_lengths),np.max(all_dates_run_lengths),21)
            #digitized_run_lengths = np.digitize(all_dates_run_lengths, bin_edges_array)
            #binned_initial_distance_goal = [[] for i in range(len(bin_edges_array)+1)]
            #for id_digitized_run_length, digitized_run_length in enumerate(digitized_run_lengths):
            #    binned_initial_distance_goal[digitized_run_length].append(all_dates_initial_dist_goal[id_digitized_run_length])
            #plt.title(f"[{start_date}] - [{end_date}] - box plot of run lengths and initial distances to goal")
            #labels = []
            #for id_bin_edge, bin_edge in enumerate(bin_edges_array):
            #    if id_bin_edge == 0:
            #        labels.append(f"< {bin_edge}")
            #    elif id_bin_edge == len(bin_edges_array)-1:
            #        labels.append(f"{np.round(bin_edges_array[id_bin_edge-1])}-\n{np.round(bin_edge)}")
            #        labels.append(f"> {np.round(bin_edge)}")
            #    else: labels.append(f"{np.round(bin_edges_array[id_bin_edge-1])}-\n{np.round(bin_edge)}")
            #plt.boxplot(binned_initial_distance_goal[1:], labels=labels[1:])
            #plt.xlabel("run time in seconds")
            #plt.ylabel("initial distance to target")
            ##plt.ylim(0,1)
            #plt.savefig(f"./plots/{id_fast_runs_percentile}_boxplot_run_length_initial_distance_goal")
            #plt.show()
            #
            ##
            #fig1 = plt.figure(figsize=(16,9))
            #mean_initial_distance_goal_target = np.array(all_dates_initial_dist_goal) - np.array(all_dates_initial_distance_target) / 2
            #bin_edges_array = np.linspace(np.min(all_dates_run_lengths),np.max(all_dates_run_lengths),21)
            #digitized_run_lengths = np.digitize(all_dates_run_lengths, bin_edges_array)
            #binned_mean_initial_distance_goal_target = [[] for i in range(len(bin_edges_array)+1)]
            #for id_digitized_run_length, digitized_run_length in enumerate(digitized_run_lengths):
            #    binned_mean_initial_distance_goal_target[digitized_run_length].append(mean_initial_distance_goal_target[id_digitized_run_length])
            #plt.title(f"[{start_date}] - [{end_date}] - box plot of run lengths and mean of distances to goal and target")
            #labels = []
            #for id_bin_edge, bin_edge in enumerate(bin_edges_array):
            #    if id_bin_edge == 0:
            #        labels.append(f"< {bin_edge}")
            #    elif id_bin_edge == len(bin_edges_array)-1:
            #        labels.append(f"{np.round(bin_edges_array[id_bin_edge-1])}-\n{np.round(bin_edge)}")
            #        labels.append(f"> {np.round(bin_edge)}")
            #    else: labels.append(f"{np.round(bin_edges_array[id_bin_edge-1])}-\n{np.round(bin_edge)}")
            #plt.boxplot(binned_mean_initial_distance_goal_target[1:], labels=labels[1:])
            #plt.xlabel("run time in seconds")
            #plt.ylabel("initial distance to target")
            ##plt.ylim(0,1)
            #plt.savefig(f"./plots/{id_fast_runs_percentile}_boxplot_run_length_mean_initial_distance_goal_target")
            #plt.show()
            
            # plot mean of each run length bin
            bin_edges_array = np.linspace(np.min(all_dates_run_lengths),np.max(all_dates_run_lengths),21)
            digitized_run_lengths = np.digitize(all_dates_run_lengths, bin_edges_array)
            binned_initial_distance_target = [[] for i in range(len(bin_edges_array)+1)]
            for id_digitized_run_length, digitized_run_length in enumerate(digitized_run_lengths):
                #print(digitized_run_length)
                binned_initial_distance_target[digitized_run_length].append(all_dates_initial_distance_target[id_digitized_run_length])
            #labels = []
            #fo#r id_bin_edge, bin_edge in enumerate(bin_edges_array):
            #    if id_bin_edge == 0:
            #        labels.append(f"< {bin_edge}")
            #    elif id_bin_edge == len(bin_edges_array)-1:
            #        labels.append(f"{np.round(bin_edges_array[id_bin_edge-1])}-\n{np.round(bin_edge)}")
            #        labels.append(f"> {np.round(bin_edge)}")
            #    else: labels.append(f"{np.round(bin_edges_array[id_bin_edge-1])}-\n{np.round(bin_edge)}")
            # calculate bin centers (x)
            bin_x_centers = []
            for id_bin_edge, bin_edge in enumerate(bin_edges_array):
                if id_bin_edge == 0:
                    bin_x_centers.append(np.nanmean([0,bin_edge]))
                elif id_bin_edge == len(bin_edges_array)-1:
                    bin_x_centers.append(np.nanmean([bin_edges_array[id_bin_edge-1], bin_edge]))
                    bin_x_centers.append(bin_edge)
                else: bin_x_centers.append(np.nanmean([bin_edges_array[id_bin_edge-1], bin_edge]))
            #plt.boxplot(binned_initial_distance_target[1:], labels=labels[1:])
            mean_per_bin = [np.nanmean(bin_dists) for bin_dists in binned_initial_distance_target]
            plt.plot(bin_x_centers, mean_per_bin, label=perc_labels[id_fast_runs_percentile])
            
        plt.title(f"[{start_date}] - [{end_date}] - plot of run lengths and initial distances to target")
        plt.xlabel("run time in seconds")
        plt.ylabel("initial distance to target")
        plt.savefig(f"./plots/plot_run_length_initial_distance_target")
        plt.legend()
        plt.show()

    return all_returns
    
def calculate_run_length_initial_dist_for_run(run, id_run, date_dict, run_arguments=None, plot=False, verbose=False):
    fish1_pos_this_run = np.array([all_fish_in_ts[1]['position'] for all_fish_in_ts in date_dict['fish'][run[0]:run[1]+1]])
    robot_pos_run = np.array(date_dict['positions'][run[0]:run[1]+1])
    run_length = date_dict['run_lengths'][id_run]
    

    initial_distance_target = distance(fish1_pos_this_run[0], robot_pos_run[0])

    # get dist to goal
    inital_robot_pos_run = robot_pos_run[0]
    initial_dist_goal = get_distance_to_goal(inital_robot_pos_run)

    return [initial_distance_target, initial_dist_goal, run_length]
    

In [None]:
# find turn in all runs over all dates
start_date = "2021-11-19"
end_date = "2021-11-25"
#fast_runs_percentiles = [100,25,10]

all_returns = plot_run_length_initial_dist_for_all_dates(start_date=start_date, end_date=end_date, load_only_challenge_data=True, challenges=True, only_successful=True, fast_runs_percentiles=fast_runs_percentiles, plot=True, verbose=False)

In [None]:
all_dates_initial_distance_target_100 = np.array(all_returns[0])[:,0]
all_dates_initial_dist_goal_100 = np.array(all_returns[0])[:,1]
all_dates_run_lengths_100 = np.array(all_returns[0])[:,2]

all_dates_initial_distance_target_25 = np.array(all_returns[1])[:,0]
all_dates_initial_dist_goal_25 = np.array(all_returns[1])[:,1]
all_dates_run_lengths_25 = np.array(all_returns[1])[:,2]

all_dates_initial_distance_target_10 = np.array(all_returns[2])[:,0]
all_dates_initial_dist_goal_10 = np.array(all_returns[2])[:,1]
all_dates_run_lengths_10 = np.array(all_returns[2])[:,2]

print(
np.mean(all_dates_initial_distance_target_100),
np.mean(all_dates_initial_distance_target_25),
np.mean(all_dates_initial_distance_target_10))

print(
np.mean(all_dates_initial_dist_goal_100),
np.mean(all_dates_initial_dist_goal_25),
np.mean(all_dates_initial_dist_goal_10))

### Plot dist to goal(x) and dist to target(y)

In [None]:
only_successful = True


dates_keys = dates_dict.keys()

if start_date is not None:
        start_date_dt = datetime.strptime(start_date, '%Y-%m-%d')
if end_date is not None:
        end_date_dt = datetime.strptime(end_date, '%Y-%m-%d')

daily_initial_dist_robot_target_fish = []
daily_initial_dist_robot_goal = []
daily_all_dist_robot_target_fish = []
daily_all_dist_robot_goal = []
for date_key in dates_keys:

     # filter date range
    date = datetime.strptime(date_key, '%Y-%m-%d')
    if start_date is not None:
        start_date_dt = datetime.strptime(start_date, '%Y-%m-%d')
        if start_date is not None and start_date_dt > date:
            continue
    else:
        start_date = list(dates_keys)[0]
    if end_date is not None:
        end_date_dt = datetime.strptime(end_date, '%Y-%m-%d')
        if end_date is not None and end_date_dt < date:
            continue
    else:
        end_date = list(dates_keys)[-1]
    
    # generate data for plots
    date_dict = dates_dict[date_key]

    fish_instance = date_dict["fish"]
    runs = date_dict["runs"]

    # filter runs
    if only_successful:
        runs, ids_runs = get_successful_runs(runs,date_dict["successful"])
    elif challenges:
        runs, ids_runs = get_challenge_runs(runs,date_dict["challenges"])
    else:
        runs = date_dict["runs"]
        ids_runs = list(range(len(runs)))


    # get pos for all fish for all timesteps
    if len(runs) > 0:
        fish_pos_runs = get_fish_pos_per_run(fish_instance,runs)
    else:
        print(f"No runs in {date_key}")
        continue
    
    day_all_dist_robot_target_fish = []
    day_all_dist_robot_goal = []
    day_initial_dist_robot_target_fish = []
    day_initial_dist_robot_goal = []
    for id_run, run in enumerate(runs):

        # get dist to target fish
        all_fish_pos_this_run = fish_pos_runs[id_run]       
        fish1_pos_this_run = np.array([fish[0] for fish in all_fish_pos_this_run])
        robot_pos_run = np.array(date_dict['positions'][run[0]:run[1]+1])
        assert len(fish1_pos_this_run) == len(robot_pos_run), print(f"fish:{len(fish1_pos_this_run)}; robot:{len(robot_pos_run)}")
        initial_distance = distance(fish1_pos_this_run[0], robot_pos_run[0])
        day_initial_dist_robot_target_fish.append(initial_distance)

        # sanity check
        if len(all_fish_pos_this_run) != len(robot_pos_run):
            print(f"Wrong array lengths: fish and robot {run} {len(all_fish_pos_this_run)} {len(robot_pos_run)}")
            assert False
        # else:
        #     print("all good") 

        # get dist to goal
        inital_robot_pos_run = robot_pos_run[0]
        initial_dist_goal = get_distance_to_goal(inital_robot_pos_run)

        day_initial_dist_robot_goal.append(initial_dist_goal)
        
        # calc dist of all pos to target and goal
        all_goal_dists = get_distance_to_goal(robot_pos_run)
        all_target_dists = np.linalg.norm(np.array(fish1_pos_this_run)-np.array(robot_pos_run), axis=1)
        assert len(all_goal_dists) == len(all_target_dists)
        
        day_all_dist_robot_goal.extend(all_goal_dists)
        day_all_dist_robot_target_fish.extend(all_target_dists)
        
        if id_run == 0:
            fig3 = plt.figure(figsize=(16,9))
            plt.scatter(all_goal_dists, all_target_dists, s=3)
            plt.xlabel("dist to goal")
            plt.ylabel("distance to target fish")
            plt.title(f"[{date_key}] - run {id_run} - correlation from dists to goal to dists to target fish")

    daily_initial_dist_robot_target_fish.append(day_initial_dist_robot_target_fish)
    daily_initial_dist_robot_goal.append(day_initial_dist_robot_goal)
    
    daily_all_dist_robot_target_fish.extend(day_all_dist_robot_target_fish)
    daily_all_dist_robot_goal.extend(day_all_dist_robot_goal)
    
flat_daily_initial_dist_robot_goal = flatten_2d_list(daily_initial_dist_robot_goal)
flat_daily_initial_dist_robot_target_fish = flatten_2d_list(daily_initial_dist_robot_target_fish)

print(np.asarray(daily_all_dist_robot_target_fish).shape)
    
# plot1
fig1 = plt.figure(figsize=(16,9))
plt.scatter(flat_daily_initial_dist_robot_goal, flat_daily_initial_dist_robot_target_fish, s=3)
plt.xlabel("initial dist to goal")
plt.ylabel("initial distance to target fish")
plt.title(f"[{start_date}] - [{end_date}] - correlation from initial dist to goal to initial dist to target fish")
plt.legend()
plt.show()

#plot2
fig2 = plt.figure(figsize=(16,9))
plt.scatter(daily_all_dist_robot_goal, daily_all_dist_robot_target_fish, s=3)
plt.xlabel("dist to goal")
plt.ylabel("distance to target fish")
plt.title(f"[{start_date}] - [{end_date}] - correlation from dists to goal to dists to target fish")
plt.legend()
plt.show()

### Histogram x distance to target and y turns towards target

### Plot start positions of target fish

In [None]:
%matplotlib inline
only_successful = True
challenges = True

dates_keys = dates_dict.keys()

if start_date is not None:
        start_date_dt = datetime.strptime(start_date, '%Y-%m-%d')
if end_date is not None:
        end_date_dt = datetime.strptime(end_date, '%Y-%m-%d')

        
all_initial_fish_pos = []
for date_key in dates_keys:

    # check date 
    date = datetime.strptime(date_key, '%Y-%m-%d')
    if start_date is not None and start_date_dt > date:
        continue
    if end_date is not None and end_date_dt < date:
        continue

    # generate data for plots
    day_initial_fish_pos = []
    date_dict = dates_dict[date_key]

    fish_instance = date_dict["fish"]
    runs = date_dict["runs"]
    if only_successful:
        runs, _ = get_successful_runs(runs,date_dict["successful"])
    elif challenges:
        runs, _ = get_challenge_runs(runs,date_dict["challenges"])
    else:
        runs = date_dict["runs"]

    if len(runs) > 0:
        fish_pos_runs = get_fish_pos_per_run(fish_instance,runs)
    else:
        continue

    for id_run, run in enumerate(runs):
        fish_pos_this_run = fish_pos_runs[id_run]
        robot_pos_run = np.array(date_dict['positions'][run[0]:run[1]])
        # sanity check
        if len(fish_pos_this_run) != len(robot_pos_run):
            print("Wrong array lengths: fish and robot")
            assert False
        fish1_pos_this_run = np.array([fish[0] for fish in fish_pos_this_run])
        
        day_initial_fish_pos.append(fish1_pos_this_run[0])
    all_initial_fish_pos.append(day_initial_fish_pos)
flat_all_initial_fish_pos = []
for day_initial_fish_pos in all_initial_fish_pos:
    for initial_fish_pos in day_initial_fish_pos:
        flat_all_initial_fish_pos.append(initial_fish_pos)
flat_all_initial_fish_pos = np.asarray(flat_all_initial_fish_pos)

# print(flat_all_initial_fish_pos.shape)

fig = plt.figure(figsize=(15,9))
plt.xlim(0,2000)
plt.ylim(0,2000)
plt.scatter(flat_all_initial_fish_pos[:,0], flat_all_initial_fish_pos[:,1])
plt.show()

### Plot difficulty histogram

In [None]:
import matplotlib.patches as patches

def plot_difficulty_histogram_for_all_dates(start_date=None, end_date=None, load_only_challenge_data=True, challenges=True, only_successful=True, fast_runs_percentiles=None, plot=False, verbose=False):
    all_returns = iteratively_evaluate_dates_files(start_date=start_date, end_date=end_date, eval_date_function=collect_all_difficulties_in_date_dict, load_only_challenge_data=load_only_challenge_data, challenges=challenges, only_successful=only_successful, fast_runs_percentiles=fast_runs_percentiles, verbose=verbose)
    all_returns = np.asarray(all_returns, dtype='object')    
    
    if fast_runs_percentiles is not None:
        nrow = 1; ncol = 3;
        fig, axs = plt.subplots(nrows=nrow, ncols=ncol, figsize=(15, 5))
        axs = axs.reshape(-1)

        #for ax in axs.reshape(-1): 
          #ax.set_ylabel(str(i))
        
        for id_fast_runs_percentile, fast_runs_percentile in enumerate(fast_runs_percentiles):            
            all_dates_easy_runs = np.array(all_returns[id_fast_runs_percentile], dtype='object')[:,0]
            all_dates_medium_runs = np.array(all_returns[id_fast_runs_percentile], dtype='object')[:,1]
            all_dates_hard_runs = np.array(all_returns[id_fast_runs_percentile], dtype='object')[:,2]
            
            #plot
            ax = axs[id_fast_runs_percentile]
            print(f"Percentile {fast_runs_percentile}: \n\teasy:{np.sum(all_dates_easy_runs)}, medium:{np.sum(all_dates_medium_runs)}, hard:{np.sum(all_dates_hard_runs)}")
            ax.bar(x=range(3),height=[np.sum(all_dates_easy_runs),np.sum(all_dates_medium_runs),np.sum(all_dates_hard_runs)], tick_label=["easy","medium","hard"])
        plt.show()
            
    
    return all_returns
    
def collect_all_difficulties_in_date_dict(date_dict,challenges, only_successful, fast_runs_percentile):
    if not date_dict:
        return [0,0,0]
        
    runs = date_dict["runs"]
    run_lengths = date_dict["run_lengths"]
    
    if only_successful:
        runs, ids_runs = get_successful_runs(runs,date_dict["successful"])
        run_lengths = np.asarray(run_lengths)[ids_runs]
    elif challenges:
        runs, ids_runs = get_challenge_runs(runs,date_dict["challenges"])
        run_lengths = np.asarray(run_lengths)[ids_runs]
    else:
        runs = date_dict["runs"]
        ids_runs = list(range(len(runs)))
    # filter good runs
    if fast_runs_percentiles is not None:
        runs, ids_runs = get_fast_runs(runs, ids_runs, run_lengths, percentile=fast_runs_percentile)
    
    
    all_difficulties = date_dict["difficulties"]
    all_filtered_run_difficulties = np.array(all_difficulties)[ids_runs]
    
    easy = len(all_filtered_run_difficulties[all_filtered_run_difficulties==2])
    medium = len(all_filtered_run_difficulties[all_filtered_run_difficulties==4])
    hard = len(all_filtered_run_difficulties[all_filtered_run_difficulties==6])
    
    return [easy, medium, hard]

In [None]:
# find turn in all runs over all dates
start_date = "2021-11-19"
end_date = "2022-12-25"
fast_runs_percentiles = [100,25,10]

all_returns = plot_difficulty_histogram_for_all_dates(start_date=start_date, end_date=end_date, load_only_challenge_data=True, challenges=True, only_successful=True, fast_runs_percentiles=fast_runs_percentiles, plot=True, verbose=False)

### Plot usage statistics

In [None]:
%matplotlib inline
# bar plot average time of day for each run
fig = plot_time_of_day_histogram(dates_dict, challenges=False, only_successful=True)

In [None]:
%matplotlib inline
# bar plot number of (unique) visitors for each day
fig = plot_daily_number_runs(dates_dict, show=True)
fig.savefig("num_runs.png")

In [None]:
%matplotlib inline
# plot daily use times and operational times in hours
fig = plot_daily_use_times_and_operational_times(dates_dict, to_pdf=False)

In [None]:
# plot daily start and end times
%matplotlib inline
plot_daily_start_end_times(dates_dict)

In [None]:
fig = plot_weekday_business(dates_dict, show=True)

### overall weekday business plot

In [None]:
from util import daterange

use_only_challenge_data = True
        
#load days one by one and collect overall stats
start_date = datetime.strptime("2021-11-19", "%Y-%m-%d")
end_date = datetime.strptime("2022-10-25", "%Y-%m-%d")

weekday_perc_use_times = [[] for i in range(7)] #setup weekday array; 0 is monday
weekday_visitors = [[] for i in range(7)]
for single_date in daterange(start_date, end_date):
    current_date_str = single_date.strftime("%Y-%m-%d")
    # load day
    dates_dict = load_dates_from_npz(current_date_str, current_date_str, only_challenges=False, verbose=False)
    # extend day data
    dates_dict = extend_robot_data(dates_dict)
    # generate stats
    date_dict = dates_dict.get(current_date_str,{})
    if date_dict:
        date_ts = date_dict["timestamps"]
        date_run_lengths = date_dict["run_lengths"]
        date_runs = date_dict["runs"]

        # percentual use time
        day_length = date_dict.get("day_length",0)/60/60
        day_use_time = np.sum(date_run_lengths)/60/60
        perc_use_time = np.divide(day_use_time, day_length,out=np.zeros_like(day_use_time), where=day_length!=0) * 100

        # estimated num of visitors
        estimated_num_visitors = len(date_dict['runs']) - len(get_challenge_runs(date_runs,date_dict["challenges"])[0])

        # get current weekday 
        date_weekday = datetime.strptime(current_date_str+" 01:00:00,000", '%Y-%m-%d %H:%M:%S,%f').weekday()

        # fill weekday arrays
        weekday_perc_use_times[date_weekday].append(perc_use_time)
        weekday_visitors[date_weekday].append(estimated_num_visitors)
        
    else:
        # fill weekday arrays
        weekday_perc_use_times[date_weekday].append(0)
        weekday_visitors[date_weekday].append(0)
        
# get means
mean_weekday_perc_use_times = []
for weekday in weekday_perc_use_times:
    if len(weekday) == 0:
        mean_weekday_perc_use_times.append(0)
    else:
        mean_weekday_perc_use_times.append(np.nanmean(weekday))
# mean_weekday_perc_use_times = np.mean(weekday_perc_use_times, axis=1)
mean_weekday_visitors = []
for weekday in weekday_visitors:
    if len(weekday) == 0:
        mean_weekday_visitors.append(0)
    else:
        mean_weekday_visitors.append(np.nanmean(weekday))
        
        
# plot
with sns.axes_style("darkgrid"):
    # plot
    title = f"average weekday use times and visitors \n{start_date} - {end_date}"
    fig, ax1 = plt.subplots(num=title, figsize=(13,7))

    weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

    ax2 = ax1.twinx()
    num_vis_plot = ax1.bar(weekdays, mean_weekday_visitors, color='g', label='mean number of estimated visitors per weekday')
    perc_plot = ax2.plot(mean_weekday_perc_use_times, 'bo', ms=10, label='mean percentual use time of operational time')

    import matplotlib.ticker as mtick
    ax2.yaxis.set_major_formatter(mtick.PercentFormatter())


    ax1.set_xlabel('weekdays')
    ax1.set_ylabel('number of estimated visitors', color='g')
    ax2.set_ylabel('percentual use time of running time (in %)', color='b')

    print(ax1,ax2)
    from matplotlib import rcParams
    rcParams.update({'figure.autolayout': True})
    # nticks = 7
    # import matplotlib
    # ax1.yaxis.set_major_locator(matplotlib.ticker.LinearLocator(nticks))
    # ax2.yaxis.set_major_locator(matplotlib.ticker.LinearLocator(nticks))
    ax1.grid(None)
    ax2.grid(None)
    # ax2.set_yticks(np.linspace(ax2.get_yticks()[0], ax2.get_yticks()[-1], len(ax1.get_yticks())))

    ax1.legend(handles=[num_vis_plot,perc_plot[0]])

    
    plt.show()
    fig.savefig("overall_weekday_business.png")


# Filter challenge data

In [None]:
# remove all non challenge data from dates_dict
filter_dates_dict_for_challenge_runs(dates_dict)

# Save data

### npy

In [None]:
# save all loaded dates zo npz
save_dates_to_npz(dates_dict, only_challenges=True)

### compressed npz

In [None]:
# save to gz file

for key in dates_dict.keys():
    date = dates_dict[key]
    file_name = f".\loaded_data\dates_dict_{key}_compressed.npz"
    print(f"Saving {key} to {file_name}")
    np.savez(file_name, date)

### json

In [None]:
import json

# save day by day into json files
for key in dates_dict.keys():
    date = dates_dict[key]
    file_name = f".\loaded_data\dates_dict_{key}.json"
    print(f"Saving {key} to {file_name}")
    with open(file_name, 'w') as fp:
        json.dump(date, fp)


### hdf5

In [None]:
import h5py
hf.close()
# save day by day into json files
for key in dates_dict.keys():
    date_dict = dates_dict[key]
    file_name = f".\loaded_data\dates_dict_{key}_compressed.h5"
    print(f"Saving {key} to {file_name}")
    hf = h5py.File(file_name, 'w')
    for dset_name in date_dict:
        #print(dset_name)
        if dset_name == 'fish':
            all_fish_grp = hf.create_group('fish')
            for ts, fish_ts in enumerate(date_dict['fish']):
                ts_grp = all_fish_grp.create_group(str(ts))
                for i, fish in enumerate(fish_ts):
                    fish_grp = ts_grp.create_group(str(i))
                    for dset_name_fish in fish:
                        try:
                            fish_grp.create_dataset(dset_name_fish, data = fish[dset_name_fish])
                        except Exception as e:
                            print(dset_name_fish)
                            print(e)
                            raise
        elif dset_name == 'day_length':
            try:
                dset = hf.create_dataset(dset_name, data = date_dict[dset_name])
            except Exception as e:
                print(dset_name)
                print(e)
                raise
        else:
            try:
                dset = hf.create_dataset(dset_name, data = date_dict[dset_name], compression="gzip")
            except Exception as e:
                print(dset_name)
                print(e)
                raise
        #print(grp_name, dset_name, data_dict[grp_name][dset_name])
    hf.close()

## deta base

In [None]:
import toml
from deta import Deta  # Import Deta


# read deta key from secrets
project_key = toml.load(".streamlit\secrets.toml")["deta_key"]

# Initialize with a Project Key
deta = Deta(project_key)

# This how to connect to or create a database.
#db = deta.Base("human_leadership_data")
test = deta.Base("test")

'''
put(
  data: typing.Union[dict, list, str, int, float, bool], 
  key: str = None,
  *,
  expire_in: int = None,
  expire_at: typing.Union[int, float, datetime.datetime] = None
)
'''
for date_key in dates_dict:
    print(date_key)
    test.put({"timestamps":dates_dict[date_key]["timestamps"]}, date_key)
    print(test.get(date_key).keys())


## deta drive

In [None]:
# upload files
from deta import Deta  # Import Deta

# read deta key from secrets
project_key = toml.load(".streamlit\secrets.toml")["deta_key"]

# Initialize with a Project Key
deta = Deta(project_key)

# This how to connect to or create a database.
drive = deta.Drive("human_leadership_data_HF")
date_files = glob.glob(f".\loaded_data\challenges_dates_dict_*.npy")
for file in date_files:
    os.path.basename(file)
    print(f"Uploading {file}")
    drive.put(os.path.basename(file), path=file)

In [None]:
# load files
from deta import Deta
deta = Deta(toml.load(".streamlit\secrets.toml")["deta_key"])        # Initialize with a Project Key
drive = deta.Drive("human_leadership_data_HF")

all_files = drive.list(limit=1000)["names"] #https://docs.deta.sh/docs/drive/sdk#list


date_file = drive.get(all_files[0])
os.remove("local_temp_file.npy")

with open("local_temp_file.npy", "wb+") as f:
    for chunk in date_file.iter_chunks(4096):
        f.write(chunk)
    date_file.close()
loaded_dict = np.load("local_temp_file.npy",allow_pickle=True).item()
loaded_dict.keys()
#os.remove("local_temp_file.npy")

In [None]:
# create images drive
from deta import Deta
deta = Deta(toml.load(".streamlit\secrets.toml")["deta_key"])        # Initialize with a Project Key
drive = deta.Drive("images_HF")
drive.put('hello.txt', 'Hello world')