In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import os,sys,glob,random, re, copy, time, itertools
from tqdm.notebook import trange, tqdm
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()
sns.set()

# from load_data import load_robot_data, load_fish_data, load_behavior_data
from extend_robot_data import extend_robot_data
from plot import plot_all_positions, plot_runs, circular_hist, plot_starts_ends, plot_rotations_and_heatmap, plot_inter_individual_distances, plot_run_length_hist, plot_robot_distance_goal, plot_runlength_dist_goal_target_corr
from plot_usage_statistics import plot_weekday_business, plot_daily_number_runs, plot_time_of_day_histogram, plot_daily_use_times_and_operational_times, plot_daily_start_end_times
from util import filter_dates_dict_for_challenge_runs, flatten_2d_list, get_fish_following_per_run, distance, get_fish_pos_per_run, get_challenge_runs, get_successful_runs, get_distance_to_goal, save_dates_to_npz, load_dates_from_npz, get_hours_minutes_seconds_from_decimal_hours, calculate_run_velocity_speed_acceleration
from loading_GUI import LoadingGUI

from load_data import load_robot_data, load_behavior_data, load_fish_data
from clean_data import clean_data
    
# from importlib import reload  
# reload(load_robot_data)


# Load data

In [None]:
# specify start and end date (min: 2021-11-19 ; max: 2022-10-25)
start_date = "2021-11-19"
end_date = "2021-11-25"

#### load already parsed files

In [None]:
dates_dict = load_dates_from_npz(start_date, end_date, only_challenges=True)

#### parse form logs

In [None]:
debug=True

# parse fom logs

# directories
base_dir="E:/tubCloud/HF_logs/"
# base_dir="C:/Users/admin/repos/thesis/logs_16_03_22/"

fish_dir=(base_dir+"fish")
robot_dir=(base_dir+"robot")

behavior_dir=(base_dir+"behavior_prints")
user_input_dir=(base_dir+"user_input")

# Load robot data
dates_dict_robot = load_robot_data(robot_dir, start_date, end_date)

dates_dict_robot[end_date].keys()
dates_dict = copy.deepcopy(dates_dict_robot)

# load behavior print data
dates_dict = load_behavior_data(behavior_dir, dates_dict, start_date, end_date)

# load fish data
dates_dict = load_fish_data(fish_dir, dates_dict, start_date, end_date)

# clean data: check if number of fish stay the same in challenge runs and check if target fish starts in correct pos
dates_dict = clean_data(dates_dict, debug=debug)

print(f"\nDone!")

### Extend data

In [None]:
# Extend robot data
dates_dict = extend_robot_data(dates_dict)

### sanity checks

In [None]:
# challenges and runs same length


In [None]:
# detect position jumps (= high acceleration)
dates_keys = dates_dict.keys()

for date_key in dates_keys:    
    date_dict = dates_dict[date_key]
    
    runs = date_dict["runs"]
    for id_run, run in enumerate(runs):
        run_accelerations = date_dict["accelerations"][id_run]
        # plt.plot(run_accelerations)
        # plt.show()

        # find accerations over 200
        ids_over_accelerations = np.where(np.abs(run_accelerations) > 200)

In [None]:
# detect rotation jumps


In [None]:
# print(dates_dict['2022-02-02']["runs"])
test = {'timestamps': dates_dict['2022-02-02']['timestamps'], 'positions': dates_dict['2022-02-02']['positions'], 'fish': dates_dict['2022-02-02']['fish']}
pd.DataFrame.from_dict(test).head(100)

In [None]:
### check for id skips in run timestamps
for date_key in dates_dict.keys():
    date_dict = dates_dict[date_key]
    date_dict_runs = date_dict["runs"]
    date_dict_ts = date_dict["timestamps"]
    
    start_pointer = 0
    for id_run, run in enumerate(date_dict_runs):
        if start_pointer != run[0]:
            print(f"timestamps skipped in run {run}: prev-run: {date_dict_runs[id_run-1]}, end last +1: {date_dict_ts[start_pointer]}; run start:{date_dict_ts[run[0]]}")
            print(f"\t{start_pointer - run[0]}")
        start_pointer = run[1]+1

In [None]:
### challenge runs cannot be longer than ~180 seconds
for date_key in dates_dict.keys():
    date_dict = dates_dict[date_key]
    date_dict_runs = date_dict["runs"]
    
    if len(date_dict_runs) > 0:
        # set non challenge runs to zero length for this check
        date_run_lengths_masked = np.ma.array(date_dict["run_lengths"], mask=np.invert(date_dict["challenges"]), copy=True, fill_value=0)
        date_run_lengths_zerofilled = np.ma.filled(date_run_lengths_masked)
        # print(date_run_lengths_zerofilled)
        # date_c_run_lengths = np.array(date_dict["run_lengths"])[date_dict["challenges"]]
        # print(date_c_run_lengths)
        long_c_runs = np.where(date_run_lengths_zerofilled > 185)[0]

        # manage too long runs
        if len(long_c_runs) > 0:
            print(date_run_lengths_zerofilled[long_c_runs])
            print(long_c_runs)
            for long_run in long_c_runs:
                long_run_start = date_dict["timestamps"][date_dict_runs[long_run][0]]
                long_run_end = date_dict["timestamps"][date_dict_runs[long_run][1]]
                print(f"long run at: {long_run_start} : {long_run_end}")


***
# Plot

### Plot robot positions

In [None]:
%matplotlib inline
# plot all positions
plot_all_positions(dates_dict, start_date=start_date, end_date=end_date, challenges=True, only_successful=True)

### Plot average rotation and position heatmap

In [None]:
# plot average rotation and position heatmap
%matplotlib inline
plot_rotations_and_heatmap(dates_dict, start_date=start_date, end_date=end_date, challenges=True, only_successful=True, ignore_robot_standing=True, polar_density=True)

### Plot individual runs

In [None]:
# plot (challenge) runs
plot_runs(dates_dict, start_date="2022-02-10", end_date="2022-02-10", challenges=True, only_successful=True)

### Plot start and end points of runs

In [None]:
# plot start and end points for all (challenge) runs
plot_starts_ends(dates_dict, start_date=None, end_date=None, challenges=True, only_successful=True)

### Plot run length histogram

In [None]:
# plot run length histogram
plot_run_length_hist(dates_dict, start_date=start_date, end_date=None, bin_size=5, challenges=True, only_successful=True)

### Plot inter-individual distances between robot and target fish for all (challenge) runs

In [None]:
# plot inter-individual distances between robot and target fish for all (challenge) runs
plot_inter_individual_distances(dates_dict, start_date, end_date, challenges=True, only_successful=True, bins=10)

### Plot robot distance to goal over time

In [None]:
# plot robot distance to goal over time
plot_robot_distance_goal(dates_dict, start_date=None, end_date=None, challenges=True, only_successful=True)

### Plot following state


In [None]:
from plot import plot_following1
# plot histogram of leading percentages and correlation plot of run length and leading percentage
plot_following1(dates_dict, start_date=start_date, end_date=end_date, only_successful=True, challenges=True) 

### Plot run length and initial target (fish and zone) dist correlation

In [None]:
plot_runlength_dist_goal_target_corr(dates_dict, start_date=None, end_date=None, only_successful=True, challenges=True, show=True)

### Plot dist to goal(x) and dist to target(y)

In [None]:
from util import equalize_arrays
a = [[1],[2,2],[3,3,3,3,3]]

equalize_arrays(a,0)
print(a)
np.mean(a,axis=0)

### Histogram x distance to target and y turns towards target

### Plot start positions of target fish

In [None]:
%matplotlib inline
only_successful = True
challenges = True

dates_keys = dates_dict.keys()

if start_date is not None:
        start_date_dt = datetime.strptime(start_date, '%Y-%m-%d')
if end_date is not None:
        end_date_dt = datetime.strptime(end_date, '%Y-%m-%d')

        
all_initial_fish_pos = []
for date_key in dates_keys:

    # check date 
    date = datetime.strptime(date_key, '%Y-%m-%d')
    if start_date is not None and start_date_dt > date:
        continue
    if end_date is not None and end_date_dt < date:
        continue

    # generate data for plots
    day_initial_fish_pos = []
    date_dict = dates_dict[date_key]

    fish_instance = date_dict["fish"]
    runs = date_dict["runs"]
    if only_successful:
        runs, _ = get_successful_runs(runs,date_dict["successful"])
    elif challenges:
        runs, _ = get_challenge_runs(runs,date_dict["challenges"])
    else:
        runs = date_dict["runs"]

    if len(runs) > 0:
        fish_pos_runs = get_fish_pos_per_run(fish_instance,runs)
    else:
        continue

    for id_run, run in enumerate(runs):
        fish_pos_this_run = fish_pos_runs[id_run]
        robot_pos_run = np.array(date_dict['positions'][run[0]:run[1]])
        # sanity check
        if len(fish_pos_this_run) != len(robot_pos_run):
            print("Wrong array lengths: fish and robot")
            assert False
        fish1_pos_this_run = np.array([fish[0] for fish in fish_pos_this_run])
        
        day_initial_fish_pos.append(fish1_pos_this_run[0])
    all_initial_fish_pos.append(day_initial_fish_pos)
flat_all_initial_fish_pos = []
for day_initial_fish_pos in all_initial_fish_pos:
    for initial_fish_pos in day_initial_fish_pos:
        flat_all_initial_fish_pos.append(initial_fish_pos)
flat_all_initial_fish_pos = np.asarray(flat_all_initial_fish_pos)

# print(flat_all_initial_fish_pos.shape)

fig = plt.figure(figsize=(15,9))
plt.xlim(0,2000)
plt.ylim(0,2000)
plt.scatter(flat_all_initial_fish_pos[:,0], flat_all_initial_fish_pos[:,1])
plt.show()

### Plot number of fish over time

### Plot usage statistics

In [None]:
%matplotlib inline
# bar plot average time of day for each run
fig = plot_time_of_day_histogram(dates_dict, challenges=False, only_successful=True)

In [None]:
%matplotlib inline
# bar plot number of (unique) visitors for each day
plot_daily_number_runs(dates_dict)

In [None]:
%matplotlib inline
# plot daily use times and operational times in hours
fig = plot_daily_use_times_and_operational_times(dates_dict, to_pdf=False)

In [None]:
# plot daily start and end times
%matplotlib inline
plot_daily_start_end_times(dates_dict)

In [None]:
fig = plot_weekday_business(dates_dict, show=True)

# Filter challenge data

In [None]:
# remove all non challenge data from dates_dict
filter_dates_dict_for_challenge_runs(dates_dict)

# Save data

### npy

In [None]:
# save all loaded dates zo npz
save_dates_to_npz(dates_dict, only_challenges=True)

### compressed npz

In [None]:
# save to gz file

for key in dates_dict.keys():
    date = dates_dict[key]
    file_name = f".\loaded_data\dates_dict_{key}_compressed.npz"
    print(f"Saving {key} to {file_name}")
    np.savez(file_name, date)

### json

In [None]:
import json

# save day by day into json files
for key in dates_dict.keys():
    date = dates_dict[key]
    file_name = f".\loaded_data\dates_dict_{key}.json"
    print(f"Saving {key} to {file_name}")
    with open(file_name, 'w') as fp:
        json.dump(date, fp)


### hdf5

In [None]:
import h5py
hf.close()
# save day by day into json files
for key in dates_dict.keys():
    date_dict = dates_dict[key]
    file_name = f".\loaded_data\dates_dict_{key}_compressed.h5"
    print(f"Saving {key} to {file_name}")
    hf = h5py.File(file_name, 'w')
    for dset_name in date_dict:
        #print(dset_name)
        if dset_name == 'fish':
            all_fish_grp = hf.create_group('fish')
            for ts, fish_ts in enumerate(date_dict['fish']):
                ts_grp = all_fish_grp.create_group(str(ts))
                for i, fish in enumerate(fish_ts):
                    fish_grp = ts_grp.create_group(str(i))
                    for dset_name_fish in fish:
                        try:
                            fish_grp.create_dataset(dset_name_fish, data = fish[dset_name_fish])
                        except Exception as e:
                            print(dset_name_fish)
                            print(e)
                            raise
        elif dset_name == 'day_length':
            try:
                dset = hf.create_dataset(dset_name, data = date_dict[dset_name])
            except Exception as e:
                print(dset_name)
                print(e)
                raise
        else:
            try:
                dset = hf.create_dataset(dset_name, data = date_dict[dset_name], compression="gzip")
            except Exception as e:
                print(dset_name)
                print(e)
                raise
        #print(grp_name, dset_name, data_dict[grp_name][dset_name])
    hf.close()

## deta base

In [None]:
import toml
from deta import Deta  # Import Deta


# read deta key from secrets
project_key = toml.load(".streamlit\secrets.toml")["deta_key"]

# Initialize with a Project Key
deta = Deta(project_key)

# This how to connect to or create a database.
#db = deta.Base("human_leadership_data")
test = deta.Base("test")

'''
put(
  data: typing.Union[dict, list, str, int, float, bool], 
  key: str = None,
  *,
  expire_in: int = None,
  expire_at: typing.Union[int, float, datetime.datetime] = None
)
'''
for date_key in dates_dict:
    print(date_key)
    test.put({"timestamps":dates_dict[date_key]["timestamps"]}, date_key)
    print(test.get(date_key).keys())


## deta drive

In [None]:
from deta import Deta  # Import Deta

# read deta key from secrets
project_key = toml.load(".streamlit\secrets.toml")["deta_key"]

# Initialize with a Project Key
deta = Deta(project_key)

# This how to connect to or create a database.
drive = deta.Drive("human_leadership_data_HF")
date_files = glob.glob(f".\loaded_data\challenges_dates_dict_*.npy")
for file in date_files:
    os.path.basename(file)
    print(f"Uploading {file}")
    drive.put(os.path.basename(file), path=file)

# GUI

In [None]:
LoadingGUI(base_dir="C:/Users/admin/repos/thesis/logs_16_03_22/")

# Testing

In [None]:
a = list(range(100))
b = [[0,3],[10,14], [17,21]]

c=[]
for i in b:
    c.append(a[i[0]:i[1]+1])
c

In [None]:
a=[1,2,3,4]
a[5:]

In [None]:
dates_dict['2022-02-01'].keys()

In [None]:
def filter_date_dict_for_challenge_runs(date_dict, challenge_runs, ids_challenge_runs):
    filtered_date_dict = dict()
    filtered_date_dict['timestamps'] = []
    filtered_date_dict['positions'] = []
    filtered_date_dict['orientation'] = []
    filtered_date_dict['rotation'] = []
    filtered_date_dict['runs'] = []
    filtered_date_dict['day_length'] = date_dict['day_length']
    filtered_date_dict['run_lengths'] = []
    filtered_date_dict['difficulties'] = []
    filtered_date_dict['fish'] = []
    filtered_date_dict['challenges'] = []
    filtered_date_dict['successful'] = []
    
    run_pointer = 0
    for challenge_tuple in zip(challenge_runs, ids_challenge_runs):
        filtered_date_dict['timestamps'].extend(date_dict['timestamps'][challenge_tuple[0][0]:challenge_tuple[0][1]])
        filtered_date_dict['positions'].extend(date_dict['positions'][challenge_tuple[0][0]:challenge_tuple[0][1]])
        filtered_date_dict['orientation'].extend(date_dict['orientation'][challenge_tuple[0][0]:challenge_tuple[0][1]])
        filtered_date_dict['rotation'].extend(date_dict['rotation'][challenge_tuple[0][0]:challenge_tuple[0][1]])
        filtered_date_dict['runs'].append([run_pointer, run_pointer + date_dict['runs'][challenge_tuple[1]][1] - date_dict['runs'][challenge_tuple[1]][0]])
        run_pointer = run_pointer + date_dict['runs'][challenge_tuple[1]][1] - date_dict['runs'][challenge_tuple[1]][0] + 1
        filtered_date_dict['run_lengths'].append(date_dict['run_lengths'][challenge_tuple[1]])
        filtered_date_dict['difficulties'].append(date_dict['difficulties'][challenge_tuple[1]])
        filtered_date_dict['fish'].extend(date_dict['fish'][challenge_tuple[0][0]:challenge_tuple[0][1]])
        filtered_date_dict['challenges'].append(date_dict['challenges'][challenge_tuple[1]])
        filtered_date_dict['successful'].append(date_dict['successful'][challenge_tuple[1]])
        

    
    return filtered_date_dict

def filter_dates_dict_for_challenge_runs(dates_dict):
    for date_dict_key in dates_dict:
        date_dict = dates_dict[date_dict_key]
        runs = date_dict['runs']
        challenges = date_dict['challenges']
        challenge_runs, ids_challenge_runs = get_challenge_runs(runs, challenges)

        filtered_date_dict = filter_date_dict_for_challenge_runs(date_dict, challenge_runs, ids_challenge_runs)
        #print(date_dict['successful'][0:10], filtered_date_dict['successful'][0:10])
        dates_dict[date_dict_key] = filtered_date_dict
    