# Locomotion analysis v3.0
This script works with the pre-assembled dataset, cutting out most of the acquiring data part, resulting in cleaner and more reliable analysis (the dataset is manually checked separately from this script).

### About the data
* `speed`: the speed as measured by the sensor attached to one wheel of the treadmill
* `totdist`: the total distance directly calculated in LabView. Whenever `speed` is negative, `totdist` is reduced! This is why we need...
* `totdist_abs`: the i-th value is calculated as `totdist_abs[i-1] + abs(speed[i]*(t[i] - t[i-1]))`, i.e. integrated absolute distance covered.
* `running`: a binary value, 0 if the mouse is not running (the `speed`is below a threshold, `40` by default), 1 otherwise. The original 100 Hz data, from which the downsampling happens, already includes an algorithm to connect two data points/intervals when the time not spent moving between them is small. The threshold for two `running` periods to still be counted as one is 250 bins@100 Hz
* `running%`: the sum of `running` (i.e. number of frames where `running` is 1) divided by the length of the segment.

In [None]:
STAT_METRICS = ["totdist_abs_norm", "running%", "avg_speed", "running_episodes", "running_episodes_mean_length"]  # metrics to test for

In [None]:
AMPL_THRESHOLD = 0.2  # threshold that one element within the running episode candidate has to be reached for the episode to not be discarded.
TEMP_THRESHOLD = 15  # in number of frames. In 15 Hz, this amounts to 1 s threshold that a candidate episode has to reach to not be discarded. 
EPISODE_MERGE_THRESHOLD_FRAMES = 15  # merge running episodes if distance smaller than this many frames (15 Hz!)

# Set version

### Set up export figure parameters

In [None]:
export_results = True

In [None]:
save_figs = True  # set to True to save the figures created
save_as_eps = False
save_as_pdf = True
if save_as_pdf:
    file_format = ".pdf"
elif save_as_eps:
    file_format = ".eps"
else:
    file_format = ".jpg"
if save_figs:
    print(f"Going to save figures as {file_format} files.")

# Import libraries

In [None]:
#Auto-reload modules (used to develop functions outside this notebook)
%load_ext autoreload
%autoreload 2

In [None]:
import labrotation.file_handling as fh
import h5py
from time import time
import matplotlib.pyplot as plt
import numpy as np
import os
from labrotation import file_handling as fh
from copy import deepcopy
import pandas as pd
import labrotation.two_photon_session as tps
import seaborn as sns
import uuid  # for unique labeling of sessions and coupling arrays (mouse velocity, distance, ...) to sessions in dataframe 
from matplotlib import cm  # colormap
import datadoc_util
from labrotation import two_photon_session as tps
from datetime import datetime
import seaborn as sns
from math import floor
import matlab.engine  # for saving data to workspace
from scipy.stats import ttest_rel

# Set seaborn parameters

In [None]:
sns.set(font_scale=3)
sns.set_style("whitegrid")

# If exists, load environmental variables from .env file

In [None]:
env_dict = dict()
if not os.path.exists("./.env"):
    print(".env does not exist")
else:
    with open("./.env", "r") as f:
        for line in f.readlines():
            l = line.rstrip().split("=")
            env_dict[l[0]] = l[1]
print(env_dict.keys())

# Set up data documentation directory

In [None]:
# assumption: inside the documentation folder, the subfolders carry the id of each mouse (not exact necessarily, but they 
# can be identified by the name of the subfolder). 
# Inside the subfolder xy (for mouse xy), xy_grouping.xlsx and xy_segmentation.xlsx can be found.
# xy_grouping.xlsx serves the purpose of finding the recordings belonging together, and has columns:
# folder, nd2, labview, lfp, face_cam_last, nikon_meta, experiment_type, day
# xy_segmentation.xlsx contains frame-by-frame (given by a set of disjoint intervals forming a cover for the whole recording) 
# classification of the events in the recording ("normal", seizure ("sz"), sd wave ("sd_wave") etc.). The columns:
# folder, interval_type, frame_begin, frame_end.

# TODO: write documentation on contents of xlsx files (what the columns are etc.)
if "DATA_DOCU_FOLDER" in env_dict.keys():
    docu_folder = env_dict["DATA_DOCU_FOLDER"]
else:
    docu_folder = fh.open_dir("Choose folder containing folders for each mouse!")
print(f"Selected folder:\n\t{docu_folder}")

In [None]:
if "documentation" in os.listdir(docu_folder):
    mouse_folder = os.path.join(docu_folder, "documentation")
else:
    mouse_folder = docu_folder
mouse_names = os.listdir(mouse_folder)
print(f"Mice detected:")
for mouse in mouse_names:
    print(f"\t{mouse}")

In [None]:
def get_datetime_for_fname():
    now = datetime.now()
    return f"{now.year:04d}{now.month:02d}{now.day:02d}-{now.hour:02d}{now.minute:02d}{now.second:02d}"

In [None]:
output_folder = env_dict["DOWNLOADS_FOLDER"]
print(f"Output files will be saved to {output_folder}")

### Load matlab-2p

In [None]:
if "MATLAB_2P_FOLDER" in env_dict.keys():
    matlab_2p_folder = env_dict["MATLAB_2P_FOLDER"]
else:
    matlab_2p_folder = fh.open_dir("Choose matlab-2p folder")
print(f"matlab-2p folder set to:\n\t{matlab_2p_folder}")

### Load data documentation

In [None]:
ddoc = datadoc_util.DataDocumentation(docu_folder)
ddoc.loadDataDoc()

### Set up color coding
for now, only possible to assign a color to each mouse. Later, when event uuids available, need to map event uuid to color code

In [None]:
df_colors = ddoc.getColorings()

In [None]:
dict_colors_mouse = df_colors[["mouse_id", "color"]].to_dict(orient="list")
dict_colors_mouse = dict(zip(dict_colors_mouse["mouse_id"], dict_colors_mouse["color"]))

In [None]:
#dict_colors_mouse["T413"] = "#000000"  # set one to black

### Load events_list dataset

In [None]:
events_list_fpath = os.path.join(docu_folder, "events_list.xlsx")
assert os.path.exists(events_list_fpath)

df_events_list = pd.read_excel(events_list_fpath)

## Load dataset

In [None]:
assembled_traces_fpath = fh.open_file("Open assembled_traces h5 file!")

In [None]:
is_chr2 = False
if "chr2" in assembled_traces_fpath.lower():
    is_chr2 = True
    print("ChR2 dataset detected")

In [None]:
if is_chr2:
    used_mouse_ids = ["OPI-2239", "WEZ-8917", "WEZ-8924", "WEZ-8922"]

In [None]:
traces_dict = dict()  
traces_meta_dict = dict()
# first keys are event uuids, inside the following dataset names:
# 'lfp_mov_t', 'lfp_mov_y', 'lfp_t', 'lfp_y', 'lv_dist', 'lv_rounds', 
# 'lv_running', 'lv_speed', 'lv_t_s', 'lv_totdist', 'mean_fluo'
with h5py.File(assembled_traces_fpath, "r") as hf:
    for uuid in hf.keys():
        if (not is_chr2) or (hf[uuid].attrs["mouse_id"] in used_mouse_ids):
            session_dataset_dict = dict() 
            session_meta_dict = dict()
            for dataset_name in hf[uuid].keys():
                session_dataset_dict[dataset_name] = np.array(hf[uuid][dataset_name])
            for attr_name in hf[uuid].attrs:
                session_meta_dict[attr_name] = hf[uuid].attrs[attr_name]
            traces_dict[uuid] = session_dataset_dict.copy()
            traces_meta_dict[uuid] = session_meta_dict.copy()

### Get locomotion amplitude by finding min and max LabView speed entries

In [None]:
min_speed = np.inf
max_speed = -np.inf
for event_uuid in traces_dict.keys():
    speed = traces_dict[event_uuid]["lv_speed"]
    min_candidate = np.min(speed)
    max_candidate = np.max(speed)
    if min_candidate < min_speed:
        min_speed = min_candidate
    if max_candidate > max_speed:
        max_speed = max_candidate

In [None]:
LV_SPEED_AMPL = max_speed - min_speed

In [None]:
min_fluo = np.inf
max_fluo = -np.inf
for event_uuid in traces_dict.keys():
    mean_fluo = traces_dict[event_uuid]["mean_fluo"]
    if is_chr2:
        if traces_meta_dict[event_uuid]["mouse_id"] in used_mouse_ids:
            if "i_stim_begin_frame" in traces_meta_dict[event_uuid].keys():
                # get 0-indexing, inclusive first and last frames of stim
                i_begin_stim = traces_meta_dict[event_uuid]["i_stim_begin_frame"]
                i_end_stim = traces_meta_dict[event_uuid]["i_stim_end_frame"]
                mean_fluo_except_stim = np.concatenate([mean_fluo[:i_begin_stim], mean_fluo[i_end_stim+1:]])
                min_candidate = np.min(mean_fluo_except_stim)
                max_candidate = np.max(mean_fluo_except_stim)
            else:
                print(f"{event_uuid} missing i_stim_begin_frame!")
    else:
        min_candidate = np.min(mean_fluo)
        max_candidate = np.max(mean_fluo)
    if min_candidate < min_fluo:
        min_fluo = min_candidate
    if max_candidate > max_fluo:
        max_fluo = max_candidate
print(f"{min_fluo} to {max_fluo}")

# Calculate locomotion statistics

In [None]:
def get_episodes(segment, merge_episodes=False, merge_threshold_frames=EPISODE_MERGE_THRESHOLD_FRAMES, return_begin_end_frames = False):
    # if returns indices, then (i_begin, i_end) are both inclusive in 0-indexing!
    
    
    n_eps = 0
    episode_lengths = []  # in frame units
    episodes = []
    n_episodes = 0
    current_episode_len = 0
    
    episode_begin = 0
    episode_end = 0
    
    # algorithm: detect episode begin and episode end. record it in list
    
    for i_frame in range(len(segment)-1):  # check current and next element for end of a episode: ...100...
        if segment[i_frame] == 1:  # current frame is part of an episode
            # increase current episode length
            if i_frame == 0 or segment[i_frame - 1] == 0:  # check if beginning of an episode or segment starts with an episode
                episode_begin = i_frame
            current_episode_len += 1
            if segment[i_frame+1] == 0: # episode ends with next frame
                n_episodes += 1
                episode_lengths.append(current_episode_len)
                episodes.append((episode_begin, i_frame))
                current_episode_len = 0
    if segment[-1] == 1: # check if there is one episode that does not end
        n_episodes += 1
        # add last segment to segments list
        current_episode_len += 1
        episode_lengths.append(current_episode_len)
        episodes.append((episode_begin, len(segment)-1))
        current_episode_len = 0
        
    assert current_episode_len == 0
    if merge_episodes:
        if len(episodes) < 2:  # single (or zero) episode cannot be merged
            if return_begin_end_frames:
                return episodes
            else:
                return [ep[1]-ep[0] + 1 for ep in episodes]
        
        # merge episodes that are close to each other
        episodes_merged = []

        episode_begin = episodes[0][0]
        episode_end = episodes[0][1]
        # starting with second episode, check if current episode can be merged with previous. If yes, update episode_end.
        # If not, add previous episode to list, update episode_begin and episode_end to current episode values
        
        
        for i_episode in range(1, len(episodes)):
            current_episode_begin = episodes[i_episode][0]
            current_episode_end = episodes[i_episode][1]

            delta = current_episode_begin - episode_end
            
            if delta <= merge_threshold_frames:  # merge current episode to previous one
                episode_end = current_episode_end
            else:  # add previous episode to list, start with current episode
                episodes_merged.append((episode_begin, episode_end))
                episode_begin = current_episode_begin
                episode_end = current_episode_end
        # add last segment to list
        episodes_merged.append((episode_begin, episode_end))
        if return_begin_end_frames:
            return episodes_merged
        else:
            episode_lengths_merged = [ep[1]-ep[0] + 1 for ep in episodes_merged]
            return episode_lengths_merged
    if return_begin_end_frames:
        return episodes
    else:
        return episode_lengths  # len() shows n_episodes

In [None]:
def apply_threshold(speed_trace, episodes, temporal_threshold, amplitude_threshold):
    """
    Given a trace and a list of tuples (i_begin_frame, i_end_frame), this function discards those that
    a.) are shorter than the defined temporal threshold (in units of frames),
    OR
    b.) the amplitude of the trace does not reach the amplitude threshold during the episode.
    Returns the filtered episodes.
    """
    
    discard_list = []
    for i_episode, episode in enumerate(episodes):  # tuple of (i_begin, i_end). Assume [i_begin:i_end+1] is correct, see get_episodes()
        episode_trace = speed_trace[episode[0]:episode[1]+1]
        # filter by temporal threshold
        if len(episode_trace) < temporal_threshold:
            # print(f"{len(episode_trace)}")
            if i_episode not in discard_list:
                discard_list.append(i_episode)
        # filter by amplitude threshold
        if max(episode_trace) < amplitude_threshold:
            if i_episode not in discard_list:
                discard_list.append(i_episode)
    discard_list = sorted(discard_list)
    
    # discard components
    episodes_filtered = [episodes[i] for i in range(len(episodes)) if i not in discard_list]
    return episodes_filtered

In [None]:
use_manual_bl_am_length = True
bl_manual_length = 4500
am_manual_length = 4500


# each entry (row) should have columns: 
# uuid of event, mouse id, window type, segment type (bl/sz/am), segment length in frames, totdist, running, speed
list_statistics = []  
dict_episodes = {}
loco_binary_traces = {}  # contains the post-filtering "running" trace, of which the running% is calculated (divided by segment length)
loco_episodes = {}  # contains the first and last indices of the locomotion episodes
begin_end_frames_dict = {}

for event_uuid in traces_dict.keys():
    mouse_id = traces_meta_dict[event_uuid]["mouse_id"]
    win_type = traces_meta_dict[event_uuid]["window_type"]
    # get segment lengths
    n_bl_frames = traces_meta_dict[event_uuid]["n_bl_frames"]
    n_am_frames = traces_meta_dict[event_uuid]["n_am_frames"]
    n_frames = traces_meta_dict[event_uuid]["n_frames"]
    n_sz_frames = n_frames - n_am_frames - n_bl_frames
    
    if use_manual_bl_am_length:
        if (bl_manual_length > n_bl_frames) or (am_manual_length > n_am_frames):
            print(f"{mouse_id} {event_uuid}:\n\tNot enough bl ({n_bl_frames}, {bl_manual_length} required) or am ({n_am_frames}, {am_manual_length} required) frames available. Skipping...")
            continue
        # todo: set first and last frames for bl and am (as well as sz). If not use_manual_bl_am_length, also set it!
        # then modify code below to first and last frames
        else:
            # define baseline as last frame before sz segment, and starting bl_manual_length frames before
            last_frame_bl = n_bl_frames - 1  # 0 indexing: last bl frame, inclusive
            first_frame_bl = last_frame_bl - bl_manual_length + 1  # inclusive
            assert first_frame_bl > 0
            # define aftermath as first frame after sz segment, and ending am_manual_length frames after
            first_frame_am = n_bl_frames+n_sz_frames  # inclusive
            assert first_frame_am == n_frames - n_am_frames
            
            last_frame_am = first_frame_am + am_manual_length - 1  # inclusive
            
            #convert to [begin, end), i.e. left inclusive, right exclusive, for numpy indexing
            last_frame_bl += 1
            last_frame_am += 1
            
    else:
        first_frame_bl = 0  # inclusive
        last_frame_bl = n_bl_frames  # exclusive
        
        first_frame_am = n_bl_frames+n_sz_frames  # inclusive
        last_frame_am = n_frames  # exclusive
    
    begin_end_frames_dict[event_uuid] = [first_frame_bl, last_frame_bl, first_frame_am, last_frame_am]
    
    # print(f"{ddoc.getNikonFileNameForUuid(event_uuid)}:\n\t{n_bl_frames} bl, {n_sz_frames} mid, {n_am_frames} am")
    # get movement data
    lv_totdist = traces_dict[event_uuid]["lv_totdist"]
    lv_totdist_abs = traces_dict[event_uuid]["lv_totdist_abs"]
    lv_running = traces_dict[event_uuid]["lv_running"]
    lv_speed = traces_dict[event_uuid]["lv_speed"]
    
    # apply post-processing threshold to "running"
    
    # cut up data into segments
    lv_totdist_bl = lv_totdist[first_frame_bl:last_frame_bl]
    lv_totdist_sz = lv_totdist[last_frame_bl:first_frame_am]
    lv_totdist_am = lv_totdist[first_frame_am:last_frame_am]
    if not use_manual_bl_am_length:
        assert len(lv_totdist_bl) + len(lv_totdist_sz) + len(lv_totdist_am) == len(lv_totdist)
    else:
        assert len(lv_totdist_bl) == bl_manual_length
        assert len(lv_totdist_am) == am_manual_length 
    
    lv_totdist_abs_bl = lv_totdist_abs[first_frame_bl:last_frame_bl]
    lv_totdist_abs_sz = lv_totdist_abs[last_frame_bl:first_frame_am]
    lv_totdist_abs_am = lv_totdist_abs[first_frame_am:last_frame_am]
    
    lv_running_bl = lv_running[first_frame_bl:last_frame_bl]
    lv_running_sz = lv_running[last_frame_bl:first_frame_am]
    lv_running_am = lv_running[first_frame_am:last_frame_am]
    
    lv_speed_bl = lv_speed[first_frame_bl:last_frame_bl]
    lv_speed_sz = lv_speed[last_frame_bl:first_frame_am]
    lv_speed_am = lv_speed[first_frame_am:last_frame_am]
    
    
    # calculate statistics
    totdist_bl = lv_totdist_bl[-1] - lv_totdist_bl[0]
    totdist_sz = lv_totdist_sz[-1] - lv_totdist_sz[0]
    totdist_am = lv_totdist_am[-1] - lv_totdist_am[0]
    totdist_abs_bl = lv_totdist_abs_bl[-1] - lv_totdist_abs_bl[0]
    totdist_abs_sz = lv_totdist_abs_sz[-1] - lv_totdist_abs_sz[0]
    totdist_abs_am = lv_totdist_abs_am[-1] - lv_totdist_abs_am[0]

    speed_bl = sum(lv_speed_bl)
    speed_sz = sum(lv_speed_sz)
    speed_am = sum(lv_speed_am)
    # calculate average speed
    lv_speed_bl = np.array(lv_speed_bl)
    lv_speed_sz = np.array(lv_speed_sz)
    lv_speed_am = np.array(lv_speed_am)
    lv_running_bl = np.array(lv_running_bl)
    lv_running_sz = np.array(lv_running_sz)
    lv_running_am = np.array(lv_running_am)
    avg_speed_bl = np.mean(lv_speed_bl[lv_running_bl > 0])
    avg_speed_sz = np.mean(lv_speed_sz[lv_running_sz > 0])
    avg_speed_am = np.mean(lv_speed_am[lv_running_am > 0])
    # a definition of "max speed". To deal with possible outliers, for now, get 95th %
    max_speed_bl = np.max(lv_speed_bl) #np.median(np.sort(lv_speed_bl)[floor(0.95*len(lv_speed_bl)):])
    max_speed_sz = np.max(lv_speed_sz) #np.median(np.sort(lv_speed_sz)[floor(0.95*len(lv_speed_sz)):])
    max_speed_am = np.max(lv_speed_am) #np.median(np.sort(lv_speed_am)[floor(0.95*len(lv_speed_am)):])
    
    
    
    # number of running episodes, length
    list_episodes_bl = get_episodes(lv_running_bl, True, EPISODE_MERGE_THRESHOLD_FRAMES, return_begin_end_frames=True )  # 15 frames in 15 Hz is 1 s.
    list_episodes_sz = get_episodes(lv_running_sz, True, EPISODE_MERGE_THRESHOLD_FRAMES, return_begin_end_frames=True)
    list_episodes_am = get_episodes(lv_running_am, True, EPISODE_MERGE_THRESHOLD_FRAMES,  return_begin_end_frames=True)
    
    # apply a filter to episodes, discard those that do not fulfill the criteria
    list_episodes_bl = apply_threshold(lv_speed_bl, list_episodes_bl, TEMP_THRESHOLD, AMPL_THRESHOLD, )
    list_episodes_sz = apply_threshold(lv_speed_sz, list_episodes_sz, TEMP_THRESHOLD, AMPL_THRESHOLD, )
    list_episodes_am = apply_threshold(lv_speed_am, list_episodes_am, TEMP_THRESHOLD, AMPL_THRESHOLD, )
    
    # get the episode lengths and number of episodes
    list_episode_lengths_bl = [ep[1]-ep[0] + 1 for ep in list_episodes_bl]
    n_episodes_bl = len(list_episodes_bl)
    
    list_episode_lengths_sz = [ep[1]-ep[0] + 1 for ep in list_episodes_sz]
    n_episodes_sz = len(list_episode_lengths_sz)
    
    list_episode_lengths_am = [ep[1]-ep[0] + 1 for ep in list_episodes_am]
    n_episodes_am = len(list_episode_lengths_am)
    
    # apply filtering to "running" signal
    
    filtered_running_bl = np.zeros(len(lv_running_bl), dtype=lv_running_bl.dtype)
    filtered_running_sz = np.zeros(len(lv_running_sz), dtype=lv_running_sz.dtype)
    filtered_running_am = np.zeros(len(lv_running_am), dtype=lv_running_am.dtype)
    # add zeros before and after segments to match original recording length
    filtered_running_prebl = np.zeros(first_frame_bl, dtype=lv_running_bl.dtype)
    filtered_running_postam = np.zeros(len(lv_totdist) - last_frame_am, dtype=lv_running_am.dtype)
    
    for episode in list_episodes_bl:
        filtered_running_bl[episode[0]:episode[1]+1] = 1
    for episode in list_episodes_sz:
        filtered_running_sz[episode[0]:episode[1]+1] = 1
    for episode in list_episodes_am:
        filtered_running_am[episode[0]:episode[1]+1] = 1
    
    
    # create "running" statistic, using filtered data
    running_bl = np.sum(filtered_running_bl)  # np.sum(lv_running_bl)
    running_sz = np.sum(filtered_running_sz)  # np.sum(lv_running_sz)
    running_am = np.sum(filtered_running_am)  # np.sum(lv_running_am)
    
    loco_binary_traces[event_uuid] = np.concatenate([filtered_running_prebl, filtered_running_bl, filtered_running_sz, filtered_running_am, filtered_running_postam])
    assert len(loco_binary_traces[event_uuid]) == len(lv_totdist)
    
    # as running already has a built-in merging (see Matlab beltAddRunningProperties.m), we can count the leading edges in that data
    #n_episodes_bl2 = sum((lv_running_bl[1:] - lv_running_bl[:-1]) > 0)
    #n_episodes_sz2 = sum((lv_running_sz[1:] - lv_running_sz[:-1]) > 0)
    #n_episodes_am2 = sum((lv_running_am[1:] - lv_running_am[:-1]) > 0)
    
    #print(f"bl: {n_episodes_bl} vs {n_episodes_bl2}, sz: {n_episodes_sz} vs {n_episodes_sz2}, am: {n_episodes_am} vs {n_episodes_am2}")
    
    
    # add to episodes dict
    if mouse_id not in dict_episodes.keys():
        dict_episodes[mouse_id] = dict()
    dict_episodes[mouse_id][event_uuid] = dict()

    list_episode_lengths_bl = np.array(list_episode_lengths_bl)
    list_episode_lengths_sz = np.array(list_episode_lengths_sz)
    list_episode_lengths_am = np.array(list_episode_lengths_am)
    
    dict_episodes[mouse_id][event_uuid]["bl"] = list_episode_lengths_bl
    dict_episodes[mouse_id][event_uuid]["sz"] = list_episode_lengths_sz
    dict_episodes[mouse_id][event_uuid]["am"] = list_episode_lengths_am
    
    # calculate mean episode length, std
    bl_episode_mean_len = list_episode_lengths_bl.mean() if len(list_episode_lengths_bl) > 0 else 0
    sz_episode_mean_len = list_episode_lengths_sz.mean() if len(list_episode_lengths_sz) > 0 else 0
    am_episode_mean_len = list_episode_lengths_am.mean() if len(list_episode_lengths_am) > 0 else 0
    
    bl_episode_std = list_episode_lengths_bl.std()
    sz_episode_std = list_episode_lengths_sz.std()
    am_episode_std = list_episode_lengths_am.std()
    
    
    if "exp_type" in traces_meta_dict[event_uuid].keys():
        exp_type = traces_meta_dict[event_uuid]["exp_type"]
    else:
        exp_type = "tmev"
        
    segment_length_bl = last_frame_bl - first_frame_bl
    segment_length_sz = first_frame_am - last_frame_bl
    segment_length_am = last_frame_am - first_frame_am
        
    # add to data list
    list_statistics.append([event_uuid, mouse_id, win_type, exp_type, "bl", segment_length_bl, totdist_bl, totdist_abs_bl, running_bl, speed_bl, avg_speed_bl, n_episodes_bl, bl_episode_mean_len, bl_episode_std, max_speed_bl, ])
    list_statistics.append([event_uuid, mouse_id, win_type, exp_type, "sz", segment_length_sz, totdist_sz, totdist_abs_sz, running_sz, speed_sz, avg_speed_sz, n_episodes_sz, sz_episode_mean_len, sz_episode_std, max_speed_sz, ])
    list_statistics.append([event_uuid, mouse_id, win_type, exp_type, "am", segment_length_am, totdist_am, totdist_abs_am, running_am, speed_am, avg_speed_am, n_episodes_am, am_episode_mean_len, am_episode_std, max_speed_am, ])

In [None]:
df_stats = pd.DataFrame(data=list_statistics, columns=["event_uuid", "mouse_id", "window_type", "exp_type", "segment_type",  "segment_length", "totdist", "totdist_abs", "running", "speed", "avg_speed", "running_episodes", "running_episodes_mean_length", "running_episodes_length_std", "max_speed", ])

In [None]:
# set NaN to 0 (running_episodes_mean_length: if no episodes, then mean segment length is 0)
df_stats["running_episodes_mean_length"] = df_stats["running_episodes_mean_length"].fillna(value=0)

In [None]:
# pick a scale factor for better readability: 0.000513 -> 51.3, for example
if "n_bl_frames" in locals():
    scale_factor = n_bl_frames  # scale up to bl segment length 
else:
    scale_factor = 10000

df_stats["totdist_norm"] = scale_factor*df_stats["totdist"]/df_stats["segment_length"]
df_stats["totdist_abs_norm"] = scale_factor*df_stats["totdist_abs"]/df_stats["segment_length"]
df_stats["running_norm"] = scale_factor*df_stats["running"]/df_stats["segment_length"]
df_stats["speed_norm"] = scale_factor*df_stats["speed"]/df_stats["segment_length"]

### Add % of time spent running

In [None]:
# % of time spent running
df_stats["running%"] = 100.*df_stats["running"]/df_stats["segment_length"]  # get value as true % instead of [0, 1] float

### Replace NaN by 0 in average speed

In [None]:
df_stats["avg_speed"] = df_stats["avg_speed"].fillna(0)

In [None]:
assert df_stats["avg_speed"].isna().sum() == 0

### Add  color codes to entries

In [None]:
df_stats["color"] = df_stats.apply(lambda row: dict_colors_mouse[row["mouse_id"]], axis=1)

In [None]:
dict_colors_event = df_stats[["event_uuid", "color"]].to_dict(orient="list")
dict_colors_event = dict(zip(dict_colors_event["event_uuid"], dict_colors_event["color"]))

## Standardize window type

In [None]:
df_stats["window_type"] = df_stats["window_type"].replace({"Cx" : "NC", "ca1": "CA1"})

## Create per-mouse means

In [None]:
df_stats_per_mouse_mean = df_stats.drop(columns=["event_uuid", "window_type", "color"], axis=0).groupby(["mouse_id", "exp_type", "segment_type"]).agg(func="mean").reset_index()

In [None]:
df_stats_per_mouse_mean["window_type"] = df_stats_per_mouse_mean.apply(lambda row: ddoc.getMouseWinInjInfo(row["mouse_id"]).iloc[0].window_type, axis=1)
df_stats_per_mouse_mean["color"] = df_stats_per_mouse_mean.apply(lambda row: df_colors[df_colors["mouse_id"] == row["mouse_id"]].iloc[0].color, axis=1)

### Experiment type-related quantities

In [None]:
n_exp_types = len(df_stats.exp_type.unique())

In [None]:
exp_types = df_stats.exp_type.unique()
exp_types.sort()

# 1. TMEV

# Plot results

## Introduce mapping shorthand notation to proper names

In [None]:
if not is_chr2:
    value_mapping = {"bl":"baseline", "sz": "seizure", "am":"post-seizure"}
    df_stats["segment_type"] = df_stats["segment_type"].apply(lambda x: value_mapping[x])
    df_stats_ca1 = df_stats[df_stats["window_type"] == "CA1"]
    df_stats_nc = df_stats[df_stats["window_type"] == "NC"]

In [None]:
if not is_chr2:
    fig = plt.figure(figsize=(10,10))
    sns.violinplot(x="segment_type", y="running_norm", data=df_stats)
    #sns.stripplot(data=df_stats[df_stats["window_type"]=="CA1"], x="speed_norm", y="segment_type", hue="mouse_id", dodge=True, zorder=1, legend=False)
    plt.show()

In [None]:
if not is_chr2:
    loco_statistic = "totdist_abs"

    fig, axs = plt.subplots(2, 3, figsize=(22,12), sharey=False)
    sns.pointplot(data=df_stats, x="segment_type", y=loco_statistic, ax=axs[0][0])
    sns.pointplot(data=df_stats, x="segment_type", y="running", ax=axs[0][1])
    sns.pointplot(data=df_stats, x="segment_type", y="speed", ax=axs[0][2])

    sns.pointplot(data=df_stats[df_stats["segment_type"] == value_mapping["bl"]], x="window_type", y=loco_statistic, join=False, capsize=.1, ax=axs[1][0])
    sns.pointplot(data=df_stats[df_stats["segment_type"] ==value_mapping["sz"]], x="window_type", y=loco_statistic, join=False, capsize=.1, ax=axs[1][1])
    sns.pointplot(data=df_stats[df_stats["segment_type"] == value_mapping["am"]], x="window_type", y=loco_statistic, join=False, capsize=.1, ax=axs[1][2])
    if save_figs and False:
        fig_fpath = os.path.join(output_folder, f'loco_mean_per_segment_{get_datetime_for_fname()}{file_format}')
        plt.savefig(fig_fpath, format=file_format.split(".")[-1])
        print(f"Saved to {fig_fpath}")
    plt.show()

In [None]:
if not is_chr2:
    loco_statistic = "totdist_abs"

    fig, axs = plt.subplots(2, 3, figsize=(22,12))
    sns.lineplot(data=df_stats, x="segment_type", y=loco_statistic, hue="event_uuid", palette=dict_colors_event, ax=axs[0][0], legend=False)
    sns.lineplot(data=df_stats, x="segment_type", y="running", hue="event_uuid", palette=dict_colors_event, ax=axs[0][1], legend=False)
    sns.lineplot(data=df_stats, x="segment_type", y="speed", hue="event_uuid", palette=dict_colors_event, ax=axs[0][2], legend=False)


    sns.stripplot(data=df_stats[df_stats["segment_type"] == value_mapping["bl"]], x="window_type", hue="event_uuid", palette=dict_colors_event, y=loco_statistic, size=8, ax=axs[1][0], legend=False)
    sns.stripplot(data=df_stats[df_stats["segment_type"] == value_mapping["sz"]], x="window_type", hue="event_uuid", palette=dict_colors_event, y=loco_statistic, size=8, ax=axs[1][1], legend=False)
    sns.stripplot(data=df_stats[df_stats["segment_type"] == value_mapping["am"]], x="window_type", hue="event_uuid", palette=dict_colors_event, y=loco_statistic, size=8, ax=axs[1][2], legend=False)


    sns.violinplot(
        data=df_stats[df_stats["segment_type"] == value_mapping["bl"]],
        x="window_type", y=loco_statistic, 
        hue_order=[True, False], split=True,
        hue=True,
        palette=["lightgrey"],
        ax=axs[1][0]
    )
    axs[1][0].legend_=None

    sns.violinplot(
        data=df_stats[df_stats["segment_type"] == value_mapping["sz"]],
        x="window_type", y=loco_statistic, 
        hue_order=[True, False], split=True,
        hue=True,
        palette=["lightgrey"],
        ax=axs[1][1]
    )
    axs[1][1].legend_=None

    sns.violinplot(
        data=df_stats[df_stats["segment_type"] == value_mapping["am"]],
        x="window_type", y=loco_statistic, 
        hue_order=[True, False], split=True,
        hue=True,
        palette=["lightgrey"],
        ax=axs[1][2]
    )
    axs[1][2].legend_=None


    axs[1][0].set_title(value_mapping["bl"])
    axs[1][1].set_title(value_mapping["sz"])
    axs[1][2].set_title(value_mapping["am"])


    #axs[1][0].set_alpha(0.5)
    # found violin plot surfaces by trial and error below, for making them transparent:
    plt.setp(axs[1][0].collections[-2], alpha=.3)
    plt.setp(axs[1][0].collections[-4], alpha=.3)
    plt.setp(axs[1][1].collections[-2], alpha=.3)
    plt.setp(axs[1][1].collections[-4], alpha=.3)
    plt.setp(axs[1][2].collections[-2], alpha=.3)
    plt.setp(axs[1][2].collections[-4], alpha=.3)

    plt.tight_layout()

    if save_figs and False:
        fig_fpath = os.path.join(output_folder, f'loco_per_segment_{get_datetime_for_fname()}{file_format}')
        plt.savefig(fig_fpath, format=file_format.split(".")[-1])
        print(f"Saved to {fig_fpath}")



    plt.show()

## Plot individual recordings, color-coded by mouse ID

### Plot all possible metrics

In [None]:
if not is_chr2:
    df_stats_only_bl_am = df_stats[df_stats["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])]

In [None]:
if not is_chr2:
    df_stats_only_bl_am[df_stats_only_bl_am["segment_type"].isin([value_mapping["am"]])]

In [None]:
if not is_chr2:
    fig, axs = plt.subplots(2, 3, figsize=(22,16))
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="totdist_abs", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][0], marker="o", markersize=20, legend=False)
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="running%", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][1], marker="o", markersize=20, legend=False)
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="speed", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][2], marker="o", markersize=20, legend=False)
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="running_episodes", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[1][0], marker="o", markersize=20, legend=False)
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="avg_speed", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[1][1], marker="o", markersize=20, legend=False)
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="running_episodes_mean_length", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[1][2], marker="o", markersize=20, legend=False)


    plt.tight_layout()

    if save_figs and False:
        fig_fpath = os.path.join(output_folder, f'loco_per_segment_tmev_all_sources_sz_excluded_{get_datetime_for_fname()}{file_format}')
        plt.savefig(fig_fpath, format=file_format.split(".")[-1])
        print(f"Saved to {fig_fpath}")
    plt.show()

In [None]:
if not is_chr2:
    fig, axs = plt.subplots(2, 3, figsize=(32,20))
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="totdist_abs", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][0],  linestyle="-", marker="o", markersize=13, linewidth=1, legend=False)
    sns.despine(left=True, bottom=True, top=True, right=True)
    axs[0][0].set(xlabel='Segment', ylabel='Total (absolute) distance, a.u.')

    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="running%", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][1],  linestyle="-", marker="o", markersize=13, linewidth=1,  legend=False)
    axs[0][1].set(xlabel='Segment', ylabel='% of time spent with locomotion')

    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="running_episodes", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][2],  linestyle="-", marker="o", markersize=13, linewidth=1,  legend=False)
    axs[0][2].set(xlabel='Segment', ylabel='Number of running episodes')

    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="avg_speed", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[1][0],  linestyle="-", marker="o", markersize=13, linewidth=1,  legend=False)
    axs[1][0].set(xlabel='Segment', ylabel='Average of locomotion velocity')

    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="speed", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[1][1],  linestyle="-", marker="o", markersize=13, linewidth=1,  legend=False)
    axs[1][1].set(xlabel='Segment', ylabel='Mean length of running episodes, a.u.')

    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="running_episodes_mean_length", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[1][2],  linestyle="-", marker="o", markersize=13, linewidth=1,  legend=False)
    axs[1][2].set(xlabel='Segment', ylabel='Max velocity of locomotion, a.u.')

    plt.tight_layout()

    if save_figs:
        fig_fpath = os.path.join(output_folder, f'loco_tmev_{get_datetime_for_fname()}{file_format}')
        plt.savefig(fig_fpath, format=file_format.split(".")[-1])
        print(f"Saved to {fig_fpath}")
    plt.show()

### Plot 3 metrics along with individual points, violin plot

In [None]:
if not is_chr2:
    loco_statistic = "totdist_abs"

    fig, axs = plt.subplots(2, 3, figsize=(22,12))
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y=loco_statistic, hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][0], legend=False)
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="running", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][1], legend=False)
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="speed", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][2], legend=False)


    sns.stripplot(data=df_stats_only_bl_am, x="window_type", y=loco_statistic, hue="event_uuid", palette=dict_colors_event, size=8, ax=axs[1][0], legend=False)
    sns.stripplot(data=df_stats_only_bl_am, x="window_type", y=loco_statistic, hue="event_uuid", palette=dict_colors_event, size=8, ax=axs[1][1], legend=False)
    sns.stripplot(data=df_stats_only_bl_am, x="window_type", y=loco_statistic, hue="event_uuid", palette=dict_colors_event, size=8, ax=axs[1][2], legend=False)


    sns.violinplot(
        data=df_stats[df_stats["segment_type"] == value_mapping["bl"]],
        x="window_type", y=loco_statistic, 
        hue_order=[True, False], split=True,
        hue=True,
        palette=["lightgrey"],
        ax=axs[1][0]
    )
    axs[1][0].legend_=None

    sns.violinplot(
        data=df_stats[df_stats["segment_type"] == value_mapping["sz"]],
        x="window_type", y=loco_statistic, 
        hue_order=[True, False], split=True,
        hue=True,
        palette=["lightgrey"],
        ax=axs[1][1]
    )
    axs[1][1].legend_=None

    sns.violinplot(
        data=df_stats[df_stats["segment_type"] == value_mapping["am"]],
        x="window_type", y=loco_statistic, 
        hue_order=[True, False], split=True,
        hue=True,
        palette=["lightgrey"],
        ax=axs[1][2]
    )
    axs[1][2].legend_=None

    #axs[1][0].set_alpha(0.5)
    # found violin plot surfaces by trial and error below, for making them transparent:
    #plt.setp(axs[1][0].collections[-2], alpha=.3)
    #plt.setp(axs[1][0].collections[-4], alpha=.3)
    #plt.setp(axs[1][1].collections[-2], alpha=.3)
    #plt.setp(axs[1][1].collections[-4], alpha=.3)
    #plt.setp(axs[1][2].collections[-2], alpha=.3)
    #plt.setp(axs[1][2].collections[-4], alpha=.3)


    axs[1][0].set_title("baseline")
    axs[1][1].set_title("seizure")
    axs[1][2].set_title("aftermath")

    plt.tight_layout()

    if save_figs and False:
        fig_fpath = os.path.join(output_folder, f'loco_per_segment_sz-excluded_{get_datetime_for_fname()}{file_format}')
        plt.savefig(fig_fpath, format=file_format.split(".")[-1])
        print(f"Saved to {fig_fpath}")
    plt.show()

## Aggregate by mouse
estimator='mean', errorbar=('ci', 95) are the default statistics

In [None]:
if not is_chr2:
    df_stats_per_mouse_mean["segment_type"] = df_stats_per_mouse_mean["segment_type"].apply(lambda x: value_mapping[x])

In [None]:
if not is_chr2:
    df_stats_per_mouse_mean_only_bl_am = df_stats_per_mouse_mean[df_stats_per_mouse_mean["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])]

In [None]:
if not is_chr2:
    df_stats_per_mouse_mean_only_bl_am = df_stats_per_mouse_mean_only_bl_am.sort_values(by=["mouse_id", "exp_type", "segment_type"])

In [None]:
if not is_chr2:
    if n_exp_types > 1:
        n_exp_types = len( df_stats_per_mouse_mean.exp_type.unique())
        fig, axs = plt.subplots(n_exp_types, 6, figsize=(32,10*n_exp_types))
    else:
        n_exp_types = 1
        fig, axs = plt.subplots(2, 3, figsize=(32,20))
    for i_exp_type, exp_type in enumerate(df_stats_per_mouse_mean_only_bl_am.exp_type.unique()):
        df_exp_type = df_stats_per_mouse_mean_only_bl_am[(df_stats_per_mouse_mean_only_bl_am["exp_type"] == exp_type)]
        #print(len(df_exp_type.mouse_id.unique()))
        if n_exp_types > 1:
            axs[i_exp_type][0].set_title(f"{exp_type}")
            axs[i_exp_type][1].set_title(f"{exp_type}")
            axs[i_exp_type][2].set_title(f"{exp_type}")
            axs[i_exp_type][3].set_title(f"{exp_type}")
            axs[i_exp_type][4].set_title(f"{exp_type}")
            axs[i_exp_type][5].set_title(f"{exp_type}")
            

            sns.lineplot(data=df_exp_type, x="segment_type", y="totdist_abs_norm", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][0], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="running%", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][1], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][2], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="avg_speed", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][3], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes_mean_length", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][4], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="max_speed", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][5], marker="o", markersize=20, legend=False)
            
        else:
            axs[0][0].set_title(f"{exp_type}")
            axs[0][1].set_title(f"{exp_type}")
            axs[0][2].set_title(f"{exp_type}")
            axs[1][0].set_title(f"{exp_type}")
            axs[1][1].set_title(f"{exp_type}")
            axs[1][2].set_title(f"{exp_type}")
            
            


            sns.lineplot(data=df_exp_type, x="segment_type", y="totdist_abs_norm", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[0][0], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="running%", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[0][1], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[0][2], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="avg_speed", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[1][0], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes_mean_length", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[1][1], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="max_speed", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[1][2], marker="o", markersize=20, legend=False)
            
            
            axs[0][0].set(xlabel='Segment', ylabel='Total (absolute) distance, a.u.')
            axs[0][1].set(xlabel='Segment', ylabel='% of time spent with locomotion')
            axs[0][2].set(xlabel='Segment', ylabel='Number of running episodes')
            axs[1][0].set(xlabel='Segment', ylabel='Average of locomotion velocity')
            axs[1][1].set(xlabel='Segment', ylabel='Mean length of running episodes, a.u.')
            axs[1][2].set(xlabel='Segment', ylabel='Max velocity of locomotion, a.u.')
    plt.tight_layout()

    if save_figs:
        fig_fpath = os.path.join(output_folder, f'loco_tmev_mouse_means_{get_datetime_for_fname()}{file_format}')
        plt.savefig(fig_fpath, format=file_format.split(".")[-1])
        print(f"Saved to {fig_fpath}")
    plt.show()

In [None]:
if not is_chr2:
    loco_statistic = "totdist_abs"

    fig, axs = plt.subplots(2, 3, figsize=(22,12))
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y=loco_statistic, hue="mouse_id", palette=dict_colors_mouse,  ax=axs[0][0], legend=False)
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="running", hue="mouse_id", palette=dict_colors_mouse, ax=axs[0][1], legend=False)
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="speed", hue="mouse_id", palette=dict_colors_mouse,  ax=axs[0][2], legend=False)


    sns.stripplot(data=df_stats_only_bl_am, x="window_type", y=loco_statistic, hue="event_uuid", palette=dict_colors_event, size=8, ax=axs[1][0], legend=False)
    sns.stripplot(data=df_stats_only_bl_am, x="window_type", y=loco_statistic, hue="event_uuid", palette=dict_colors_event, size=8, ax=axs[1][1], legend=False)
    sns.stripplot(data=df_stats_only_bl_am, x="window_type", y=loco_statistic, hue="event_uuid", palette=dict_colors_event, size=8, ax=axs[1][2], legend=False)


    sns.violinplot(
        data=df_stats[df_stats["segment_type"] == value_mapping["bl"]],
        x="window_type", y=loco_statistic, 
        hue_order=[True, False], split=True,
        hue=True,
        palette=["lightgrey"],
        ax=axs[1][0]
    )
    axs[1][0].legend_=None

    sns.violinplot(
        data=df_stats[df_stats["segment_type"] == value_mapping["sz"]],
        x="window_type", y=loco_statistic, 
        hue_order=[True, False], split=True,
        hue=True,
        palette=["lightgrey"],
        ax=axs[1][1]
    )
    axs[1][1].legend_=None

    sns.violinplot(
        data=df_stats[df_stats["segment_type"] == value_mapping["am"]],
        x="window_type", y=loco_statistic, 
        hue_order=[True, False], split=True,
        hue=True,
        palette=["lightgrey"],
        ax=axs[1][2]
    )
    axs[1][2].legend_=None

    #axs[1][0].set_alpha(0.5)
    # found violin plot surfaces by trial and error below, for making them transparent:
    plt.setp(axs[1][0].collections[-2], alpha=.3)
    plt.setp(axs[1][0].collections[-4], alpha=.3)
    plt.setp(axs[1][1].collections[-2], alpha=.3)
    plt.setp(axs[1][1].collections[-4], alpha=.3)
    plt.setp(axs[1][2].collections[-2], alpha=.3)
    plt.setp(axs[1][2].collections[-4], alpha=.3)

    axs[1][0].set_title(value_mapping["bl"])
    axs[1][1].set_title(value_mapping["sz"])
    axs[1][2].set_title(value_mapping["am"])

    plt.tight_layout()


    if save_figs and False:
        fig_fpath = os.path.join(output_folder, f'loco_per_segment_sz-excluded_mean_95ci_{get_datetime_for_fname()}{file_format}')
        plt.savefig(fig_fpath, format=file_format.split(".")[-1])
        print(f"Saved to {fig_fpath}")
    plt.show()

## Plot results with illustrative example of data in top row

In [None]:
if not is_chr2:
    fig, axs = plt.subplots(2, 3, figsize=(22,14))
    dset_example = traces_dict["d158cd12ad77489a827dab1173a933f9"]

    # first row for examples

    axs[0][0].plot(dset_example["lv_t_s"], dset_example["lv_totdist"])
    axs[0][1].plot(dset_example["lv_t_s"], dset_example["lv_running"])
    axs[0][2].plot(dset_example["lv_t_s"], dset_example["lv_speed"])

    axs[0][2].set_ylim((-0.2, 0.7))
    axs[0][0].set_ylim((0, 1400))

    axs[0][0].set_xlim((350, 450))
    axs[0][1].set_xlim((350, 450))
    axs[0][2].set_xlim((350, 450))

    axs[0][0].set_ylabel("Total distance (a.u.)", fontsize=22)
    axs[0][1].set_ylabel("Running? (binary)", fontsize=22)
    axs[0][2].set_ylabel("Velocity (a.u.)", fontsize=22)


    axs[0][1].set_xlabel("Time (s)", fontsize=22)

    # second row for statistics
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y=loco_statistic, hue="mouse_id", palette=dict_colors_mouse, ax=axs[1][0], legend=False)
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="running", hue="mouse_id", palette=dict_colors_mouse, ax=axs[1][1], legend=False)
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="speed", hue="mouse_id", palette=dict_colors_mouse, ax=axs[1][2], legend=False)


    plt.tight_layout()
    #plt.savefig("D:\\Downloads\\locomotion_figure.jpg")
    plt.show()

In [None]:
if not is_chr2:
    df_stats2 = df_stats.copy()

In [None]:
if not is_chr2:
    df_stats2["segment_type"] = df_stats2["segment_type"].replace(value_mapping)

In [None]:
if not is_chr2:
    df_stats2_only_bl_am = df_stats2[df_stats2["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])]

In [None]:
if not is_chr2:
    loco_statistic = "totdist_abs"
    sns.set_style("whitegrid")
    fig, axs = plt.subplots(2, 3, figsize=(22,12))


    sns.lineplot(data=df_stats2_only_bl_am, x="segment_type", y=loco_statistic, hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][0], linestyle="-", marker="o", markersize=13, linewidth=1, legend=False)
    sns.despine(left=True, bottom=True, top=True, right=True)
    axs[0][0].set(xlabel='Segment', ylabel='Total (absolute) distance, a.u.')

    sns.lineplot(data=df_stats2_only_bl_am, x="segment_type", y="running", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][1], linestyle="-", marker="o", markersize=13,linewidth=1, legend=False)
    sns.despine(left=True, bottom=True, top=True, right=True)


    sns.lineplot(data=df_stats2_only_bl_am, x="segment_type", y="speed", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][2], linestyle="-", marker="o", markersize=13,linewidth=1, legend=False)
    sns.despine(left=True, bottom=True, top=True, right=True)


    sns.stripplot(data=df_stats2_only_bl_am, x="segment_type", y=loco_statistic, hue="event_uuid", palette=dict_colors_event, size=8, ax=axs[1][0], legend=False)
    sns.stripplot(data=df_stats2_only_bl_am, x="segment_type", y="running", hue="event_uuid", palette=dict_colors_event, size=8, ax=axs[1][1], legend=False)
    sns.stripplot(data=df_stats2_only_bl_am, x="segment_type", y="speed",  hue="event_uuid", palette=dict_colors_event, size=8, ax=axs[1][2], legend=False)

    sns.violinplot(
        data=df_stats2_only_bl_am, x="segment_type", y=loco_statistic,
        hue_order=[True, False], split=True,
        hue=True,
        palette=["lightgrey"],
        ax=axs[1][0]
    )
    axs[1][0].legend_=None

    sns.violinplot(
        data=df_stats2_only_bl_am, x="segment_type", y="running",
        hue_order=[True, False], split=True,
        hue=True,
        palette=["lightgrey"],
        ax=axs[1][1]
    )
    axs[1][1].legend_=None

    sns.violinplot(
        data=df_stats2_only_bl_am, x="segment_type", y="speed", 
        hue_order=[True, False], split=True,
        hue=True,
        palette=["lightgrey"],
        ax=axs[1][2]
    )
    axs[1][2].legend_=None

    #axs[1][0].set_alpha(0.5)
    # found violin plot surfaces by trial and error below, for making them transparent:
    #plt.setp(axs[1][0].collections[-2], alpha=.3)
    #plt.setp(axs[1][0].collections[-4], alpha=.3)
    #plt.setp(axs[1][1].collections[-2], alpha=.3)
    #plt.setp(axs[1][1].collections[-4], alpha=.3)
    #plt.setp(axs[1][2].collections[-2], alpha=.3)
    #plt.setp(axs[1][2].collections[-4], alpha=.3)



    plt.tight_layout()

    if save_figs and False:
        fig_fpath = os.path.join(output_folder, f'loco_per_segment_sz-excluded_line_and_violin_{get_datetime_for_fname()}{file_format}')
        plt.savefig(fig_fpath, format=file_format.split(".")[-1])
        print(f"Saved to {fig_fpath}")
    plt.show()

In [None]:
if not is_chr2:
    def get_y(metric="totdist_abs"):
        # return [[event_uuid, bl_uuid1, am_uuid1], ...] doublets of the chosen metric
        return [ [uuid, g[g["segment_type"] == "bl"].totdist_abs.iloc[0], g[g["segment_type"] == "am"].totdist_abs.iloc[0]] for uuid, g in df_stats[df_stats["segment_type"].isin(["bl", "am"])].groupby("event_uuid") ]

    y_totdist_abs = get_y("totdist_abs")
    y_running = get_y("running")
    y_speed = get_y("speed")
    #x = df_stats[df_stats["segment_type"].isin(["bl", "am"])]["segment_type"]
    x = [0, 1]


    fig, axs = plt.subplots(2, 3, figsize=(22,12))

    for i in range(len(y_totdist_abs)):
        c = df_stats[df_stats["event_uuid"] == y_totdist_abs[i][0]].color.iloc[0]
        axs[0][0].plot(x, y_totdist_abs[i][1:], "o-", color=c,)
        axs[0][1].plot(x, y_running[i][1:], "o-", color=c)
        axs[0][2].plot(x, y_speed[i][1:], "o-", color=c)



    sns.stripplot(data=df_stats_only_bl_am, x="segment_type", y=loco_statistic, hue="event_uuid", palette=dict_colors_event, size=8, ax=axs[1][0], legend=False)
    sns.stripplot(data=df_stats_only_bl_am, x="segment_type", y="running", hue="event_uuid", palette=dict_colors_event, size=8, ax=axs[1][1], legend=False)
    sns.stripplot(data=df_stats_only_bl_am, x="segment_type", y="speed",  hue="event_uuid", palette=dict_colors_event, size=8, ax=axs[1][2], legend=False)

    sns.violinplot(
        data=df_stats_only_bl_am, x="segment_type", y=loco_statistic,
        hue_order=[True, False], split=True,
        hue=True,
        palette=["lightgrey"],
        ax=axs[1][0]
    )
    axs[1][0].legend_=None

    sns.violinplot(
        data=df_stats_only_bl_am, x="segment_type", y="running",
        hue_order=[True, False], split=True,
        hue=True,
        palette=["lightgrey"],
        ax=axs[1][1]
    )
    axs[1][1].legend_=None

    sns.violinplot(
        data=df_stats_only_bl_am, x="segment_type", y="speed", 
        hue_order=[True, False], split=True,
        hue=True,
        palette=["lightgrey"],
        ax=axs[1][2]
    )
    axs[1][2].legend_=None

    #axs[1][0].set_alpha(0.5)
    # found violin plot surfaces by trial and error below, for making them transparent:
    #plt.setp(axs[1][0].collections[-2], alpha=.3)
    #plt.setp(axs[1][0].collections[-4], alpha=.3)
    #plt.setp(axs[1][1].collections[-2], alpha=.3)
    #plt.setp(axs[1][1].collections[-4], alpha=.3)
    #plt.setp(axs[1][2].collections[-2], alpha=.3)
    #plt.setp(axs[1][2].collections[-4], alpha=.3)



    plt.tight_layout()

    if save_figs and False:
        fig_fpath = os.path.join(output_folder, f'loco_per_segment_sz-excluded_line_and_violin_{get_datetime_for_fname()}{file_format}')
        plt.savefig(fig_fpath, format=file_format.split(".")[-1])
        print(f"Saved to {fig_fpath}")
    plt.show()

# 2. ChR2 (bl - stim - (Sz) - am protocol)

### Rename bl -> baseline, am -> post-stimulation

In [None]:
if is_chr2:
    value_mapping = {"bl":"baseline", "sz": "stimulation", "am":"post-stimulation"}

In [None]:
if is_chr2:
    df_stats["segment_type"] = df_stats["segment_type"].apply(lambda x: value_mapping[x])
    df_stats_per_mouse_mean["segment_type"] = df_stats_per_mouse_mean["segment_type"].apply(lambda x: value_mapping[x])

In [None]:
if is_chr2:
    df_stats_only_bl_am = df_stats[df_stats["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])]

# Plot results

In [None]:
if is_chr2:
    fig, axs = plt.subplots(2, 3, figsize=(22,16))

    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="totdist_abs", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][0], marker="o", markersize=20, legend=False)
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="running", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][1], marker="o", markersize=20, legend=False)
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="speed", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][2], marker="o", markersize=20, legend=False)
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="running%", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[1][0], marker="o", markersize=20, legend=False)
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="running_episodes", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[1][1], marker="o", markersize=20, legend=False)
    sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="running_episodes_mean_length", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[1][2], marker="o", markersize=20, legend=False)



    plt.tight_layout()

    if save_figs and False:  # do not save this one; need to separate by experiment type (ctl, sz, szsd...)
        fig_fpath = os.path.join(output_folder, f'loco_per_segment_{get_datetime_for_fname()}{file_format}')
        plt.savefig(fig_fpath, format=file_format.split(".")[-1])
        print(f"Saved to {fig_fpath}")
    plt.show()

### Plot each category

### Plot for CA1

In [None]:
if is_chr2:
    df_stats_ca1 = df_stats[df_stats["window_type"] == "CA1"]
    n_exp_types = len( df_stats_ca1.exp_type.unique())
    fig, axs = plt.subplots(n_exp_types, 6, figsize=(42,8*n_exp_types))
    for i_exp_type, exp_type in enumerate(exp_types):#i_exp_type, exp_type in enumerate(df_stats_ca1.exp_type.unique()):
        df_exp_type = df_stats_only_bl_am[(df_stats_only_bl_am["exp_type"] == exp_type) & (df_stats_only_bl_am["window_type"] == "CA1")]
        #print(len(df_exp_type.mouse_id.unique()))
        axs[i_exp_type][0].set_title(f"{exp_type}")
        axs[i_exp_type][1].set_title(f"{exp_type}")
        axs[i_exp_type][2].set_title(f"{exp_type}")
        axs[i_exp_type][3].set_title(f"{exp_type}")
        axs[i_exp_type][4].set_title(f"{exp_type}")
        axs[i_exp_type][5].set_title(f"{exp_type}")


        sns.lineplot(data=df_exp_type, x="segment_type", y="totdist_abs_norm", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[i_exp_type][0], marker="o", markersize=20, legend=False)
        sns.lineplot(data=df_exp_type, x="segment_type", y="running%", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[i_exp_type][1], marker="o", markersize=20, legend=False)
        sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[i_exp_type][2], marker="o", markersize=20, legend=False)
        sns.lineplot(data=df_exp_type, x="segment_type", y="avg_speed", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[i_exp_type][3], marker="o", markersize=20, legend=False)
        sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes_mean_length", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[i_exp_type][4], marker="o", markersize=20, legend=False)
        sns.lineplot(data=df_exp_type, x="segment_type", y="max_speed", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[i_exp_type][5], marker="o", markersize=20, legend=False)
        

    plt.tight_layout()

    if save_figs:
        fig_fpath = os.path.join(output_folder, f'loco_per_segment_chr2_ca1_{get_datetime_for_fname()}{file_format}')
        plt.savefig(fig_fpath, format=file_format.split(".")[-1])
        print(f"Saved to {fig_fpath}")
    plt.show()

### Plot for NC

In [None]:
if is_chr2:
    df_stats_nc = df_stats[df_stats["window_type"] == "NC"]
    if len(df_stats_nc) > 0:
        n_exp_types = len(df_stats_nc.exp_type.unique())
        fig, axs = plt.subplots(n_exp_types, 6, figsize=(42,8*n_exp_types))

        for i_exp_type, exp_type in enumerate(exp_types):#enumerate(df_stats_nc.exp_type.unique()):
            df_exp_type = df_stats_only_bl_am[(df_stats_only_bl_am["exp_type"] == exp_type) & (df_stats_only_bl_am["window_type"] == "NC")]
            #print(len(df_exp_type.mouse_id.unique()))
            axs[i_exp_type][0].set_title(f"{exp_type}")
            axs[i_exp_type][1].set_title(f"{exp_type}")
            axs[i_exp_type][2].set_title(f"{exp_type}")
            axs[i_exp_type][3].set_title(f"{exp_type}")
            axs[i_exp_type][4].set_title(f"{exp_type}")
            axs[i_exp_type][5].set_title(f"{exp_type}")
            

            sns.lineplot(data=df_exp_type, x="segment_type", y="totdist_abs_norm", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[i_exp_type][0], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="running%", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[i_exp_type][1], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[i_exp_type][2], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="avg_speed", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[i_exp_type][3], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes_mean_length", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[i_exp_type][4], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="max_speed", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[i_exp_type][5], marker="o", markersize=20, legend=False)
            

        plt.tight_layout()

        if save_figs:
            fig_fpath = os.path.join(output_folder, f'loco_per_segment_chr2_nc_{get_datetime_for_fname()}{file_format}')
            plt.savefig(fig_fpath, format=file_format.split(".")[-1])
            print(f"Saved to {fig_fpath}")
        plt.show()

# Mouse-aggregate
i.e. mean per mouse

## CA1

In [None]:
df_stats_per_mouse_mean_ca1 = df_stats_per_mouse_mean[df_stats_per_mouse_mean["window_type"] == "CA1"]
#df_stats_per_mouse_mean_ca1["segment_type"] = df_stats_per_mouse_mean_ca1["segment_type"].apply(lambda x: value_mapping[x])

In [None]:
df_stats_per_mouse_mean_ca1_only_bl_am = df_stats_per_mouse_mean_ca1[df_stats_per_mouse_mean_ca1["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])]

In [None]:
df_stats_per_mouse_mean_ca1_only_bl_am = df_stats_per_mouse_mean_ca1_only_bl_am.sort_values(by=["mouse_id", "exp_type", "segment_type"])

In [None]:
n_exp_types = len( df_stats_per_mouse_mean_ca1.exp_type.unique())
if n_exp_types > 1:
    fig, axs = plt.subplots(n_exp_types, 6, figsize=(42,8*n_exp_types))
    for i_exp_type, exp_type in enumerate(exp_types):#enumerate(df_stats_per_mouse_mean_ca1_only_bl_am.exp_type.unique()):
        df_exp_type = df_stats_per_mouse_mean_ca1_only_bl_am[(df_stats_per_mouse_mean_ca1_only_bl_am["exp_type"] == exp_type) & (df_stats_per_mouse_mean_ca1_only_bl_am["window_type"] == "CA1")]
        #print(len(df_exp_type.mouse_id.unique()))
        axs[i_exp_type][0].set_title(f"{exp_type}", fontsize=30)
        axs[i_exp_type][1].set_title(f"{exp_type}")
        axs[i_exp_type][2].set_title(f"{exp_type}")
        axs[i_exp_type][3].set_title(f"{exp_type}")
        axs[i_exp_type][4].set_title(f"{exp_type}")
        axs[i_exp_type][5].set_title(f"{exp_type}")



        sns.lineplot(data=df_exp_type, x="segment_type", y="totdist_abs_norm", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][0], marker="o", markersize=20, legend=False)
        sns.lineplot(data=df_exp_type, x="segment_type", y="running%", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][1], marker="o", markersize=20, legend=False)
        sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][2], marker="o", markersize=20, legend=False)
        sns.lineplot(data=df_exp_type, x="segment_type", y="avg_speed", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][3], marker="o", markersize=20, legend=False)
        sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes_mean_length", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][4], marker="o", markersize=20, legend=False)
        sns.lineplot(data=df_exp_type, x="segment_type", y="max_speed", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][5], marker="o", markersize=20, legend=False)
else:  # 1 experiment type (assumed n_exp_types > 0)
    fig, axs = plt.subplots(n_exp_types, 6, figsize=(42,8*n_exp_types))
    for i_exp_type, exp_type in enumerate(exp_types):#enumerate(df_stats_per_mouse_mean_ca1_only_bl_am.exp_type.unique()):
        df_exp_type = df_stats_per_mouse_mean_ca1_only_bl_am[(df_stats_per_mouse_mean_ca1_only_bl_am["exp_type"] == exp_type) & (df_stats_per_mouse_mean_ca1_only_bl_am["window_type"] == "CA1")]
        #print(len(df_exp_type.mouse_id.unique()))
        axs[0].set_title(f"{exp_type}", fontsize=30)
        axs[1].set_title(f"{exp_type}")
        axs[2].set_title(f"{exp_type}")
        axs[3].set_title(f"{exp_type}")
        axs[4].set_title(f"{exp_type}")
        axs[5].set_title(f"{exp_type}")



        sns.lineplot(data=df_exp_type, x="segment_type", y="totdist_abs_norm", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[0], marker="o", markersize=20, legend=False)
        sns.lineplot(data=df_exp_type, x="segment_type", y="running%", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[1], marker="o", markersize=20, legend=False)
        sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[2], marker="o", markersize=20, legend=False)
        sns.lineplot(data=df_exp_type, x="segment_type", y="avg_speed", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[3], marker="o", markersize=20, legend=False)
        sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes_mean_length", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[4], marker="o", markersize=20, legend=False)
        sns.lineplot(data=df_exp_type, x="segment_type", y="max_speed", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[5], marker="o", markersize=20, legend=False)

plt.tight_layout()

if save_figs:
    
    if is_chr2:  
        fname_exp_type = "chr2"
    else:
        fname_exp_type = "tmev"
    fig_fpath = os.path.join(output_folder, f'loco_per_segment_ca1_mouse_means_{fname_exp_type}_{get_datetime_for_fname()}{file_format}')
    plt.savefig(fig_fpath, format=file_format.split(".")[-1])
    print(f"Saved to {fig_fpath}")
plt.show()

## NC

In [None]:
if len(df_stats_nc) > 0:
    df_stats_per_mouse_mean_nc = df_stats_per_mouse_mean[df_stats_per_mouse_mean["window_type"] == "NC"]
    # df_stats_per_mouse_mean_nc["segment_type"] = df_stats_per_mouse_mean_nc["segment_type"].apply(lambda x: value_mapping[x])

In [None]:
if len(df_stats_nc) > 0:
    df_stats_per_mouse_mean_nc_only_bl_am = df_stats_per_mouse_mean_nc[df_stats_per_mouse_mean_nc["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])]

In [None]:
if len(df_stats_nc) > 0:
    df_stats_per_mouse_mean_nc_only_bl_am = df_stats_per_mouse_mean_nc_only_bl_am.sort_values(by=["mouse_id", "exp_type", "segment_type"])

In [None]:
if len(df_stats_nc) > 0:
    n_exp_types = len( df_stats_per_mouse_mean_nc.exp_type.unique())
    if n_exp_types > 1:
        fig, axs = plt.subplots(n_exp_types, 6, figsize=(42,8*n_exp_types))
        for i_exp_type, exp_type in enumerate(exp_types):#enumerate(df_stats_per_mouse_mean_nc.exp_type.unique()):
            df_exp_type = df_stats_per_mouse_mean_nc[(df_stats_per_mouse_mean_nc["exp_type"] == exp_type) & (df_stats_per_mouse_mean_nc["window_type"] == "NC")]
            #print(len(df_exp_type.mouse_id.unique()))
            axs[i_exp_type][0].set_title(f"{exp_type}")
            axs[i_exp_type][1].set_title(f"{exp_type}")
            axs[i_exp_type][2].set_title(f"{exp_type}")
            axs[i_exp_type][3].set_title(f"{exp_type}")
            axs[i_exp_type][4].set_title(f"{exp_type}")
            axs[i_exp_type][5].set_title(f"{exp_type}")

            sns.lineplot(data=df_exp_type, x="segment_type", y="totdist_abs_norm", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][0], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="running%", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][1], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][2], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="avg_speed", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][3], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes_mean_length", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][4], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="max_speed", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][5], marker="o", markersize=20, legend=False)
    else:  # n_exp_types = 1 ( > 0 assumed)
        fig, axs = plt.subplots(n_exp_types, 6, figsize=(42,8*n_exp_types))
        for i_exp_type, exp_type in enumerate(exp_types):#enumerate(df_stats_per_mouse_mean_nc.exp_type.unique()):
            df_exp_type = df_stats_per_mouse_mean_nc[(df_stats_per_mouse_mean_nc["exp_type"] == exp_type) & (df_stats_per_mouse_mean_nc["window_type"] == "NC")]
            #print(len(df_exp_type.mouse_id.unique()))
            axs[0].set_title(f"{exp_type}")
            axs[1].set_title(f"{exp_type}")
            axs[2].set_title(f"{exp_type}")
            axs[3].set_title(f"{exp_type}")
            axs[4].set_title(f"{exp_type}")
            axs[5].set_title(f"{exp_type}")

            sns.lineplot(data=df_exp_type, x="segment_type", y="totdist_abs_norm", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[0], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="running%", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[1], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[2], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="avg_speed", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[3], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes_mean_length", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[4], marker="o", markersize=20, legend=False)
            sns.lineplot(data=df_exp_type, x="segment_type", y="max_speed", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[5], marker="o", markersize=20, legend=False)


    plt.tight_layout()

    if save_figs:
        if is_chr2:  
            fname_exp_type = "chr2"
        else:
            fname_exp_type = "tmev"
        fig_fpath = os.path.join(output_folder, f'loco_per_segment_{fname_exp_type}_ca1_mouse_means_{get_datetime_for_fname()}{file_format}')
        plt.savefig(fig_fpath, format=file_format.split(".")[-1])
        print(f"Saved to {fig_fpath}")
    plt.show()

# Statistical testing

In [None]:
def paired_t_test(df, column_name="totdist_abs", one_sided=False, greater_expected=value_mapping["am"], pair_by="event_uuid"):
    am_vals = []
    bl_vals = []
    for i_g, g in df.groupby(pair_by):
        assert (len(g[g["segment_type"] == value_mapping["bl"]]) == 1) and (len(g[g["segment_type"] == value_mapping["am"]] ) == 1)
        bl_val = g[g["segment_type"] == value_mapping["bl"]][column_name].values
        am_val = g[g["segment_type"] == value_mapping["am"]][column_name].values
        am_vals.append(am_val[0])
        bl_vals.append(bl_val[0])
    am_vals = np.array(am_vals)
    bl_vals = np.array(bl_vals)

    if one_sided:
        ttest_result = ttest_rel(am_vals, bl_vals)
    else:
        if greater_expected==value_mapping["am"]:
            ttest_result = ttest_rel(am_vals, bl_vals, alternative="greater")  # first dataset (am) expected to be greater
        elif greater_expected==value_mapping["bl"]:
            ttest_result = ttest_rel(am_vals, bl_vals, alternative="less")  # second dataset (bl) expected to be greater
        else:
            raise Exception(f"paired_t_test(): invalid greater_expected value {greater_expected}")
    #print(ttest_result)
    return ttest_result




## CA1

In [None]:
stat_data_ca1 = df_stats[(df_stats["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])) & (df_stats["window_type"] == "CA1")]

In [None]:
print("CA1")
for statistic in STAT_METRICS:
    print(statistic)
    for exp_type, exp_g in stat_data_ca1.sort_values(by="exp_type").groupby("exp_type"):
        ttest_result = paired_t_test(exp_g, statistic)
        print(f'\t{exp_type}: {len(exp_g[exp_g["segment_type"] == value_mapping["bl"]])} pairs;\n\tstatistic={ttest_result.statistic}, p={ttest_result.pvalue}\n')


## NC

In [None]:
df_stats_nc = df_stats[df_stats["window_type"] == "NC"]

In [None]:
stat_data_nc = df_stats[(df_stats["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])) & (df_stats["window_type"] == "NC")]

In [None]:
if len(df_stats_nc) > 0:
    print("NC")
    for statistic in STAT_METRICS:
        print(statistic)
        for exp_type, exp_g in stat_data_nc.sort_values(by="exp_type").groupby("exp_type"):
            ttest_result = paired_t_test(exp_g, statistic)
            print(f'\t{exp_type}: {len(exp_g[exp_g["segment_type"] == value_mapping["bl"]])} pairs;\n\tstatistic={ttest_result.statistic}, p={ttest_result.pvalue}\n')


## Mouse-aggregate

### CA1

In [None]:
stat_data_mouse_means_ca1 = df_stats_per_mouse_mean_ca1[(df_stats_per_mouse_mean_ca1["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])) & (df_stats_per_mouse_mean_ca1["window_type"] == "CA1")]

In [None]:
print("CA1")
for statistic in STAT_METRICS:
    print(statistic)
    for exp_type, exp_g in stat_data_mouse_means_ca1.sort_values(by="exp_type").groupby("exp_type"):
        ttest_result = paired_t_test(exp_g, statistic, pair_by="mouse_id")
        print(f'\t{exp_type}: {len(exp_g[exp_g["segment_type"] == value_mapping["bl"]])} pairs;\n\tstatistic={ttest_result.statistic}, p={ttest_result.pvalue}\n')


### NC

In [None]:
if len(df_stats_nc) > 0:
    stat_data_mouse_means_nc = df_stats_per_mouse_mean_nc[(df_stats_per_mouse_mean_nc["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])) & (df_stats_per_mouse_mean_nc["window_type"] == "NC")]

In [None]:
if len(df_stats_nc) > 0:
    print("NC")
    for statistic in STAT_METRICS:
        print(statistic)
        for exp_type, exp_g in stat_data_mouse_means_nc.sort_values(by="exp_type").groupby("exp_type"):
            ttest_result = paired_t_test(exp_g, statistic, pair_by="mouse_id")
            print(f'\t{exp_type}: {len(exp_g[exp_g["segment_type"] == value_mapping["bl"]])} pairs;\n\tstatistic={ttest_result.statistic}, p={ttest_result.pvalue}\n')


# Waterfall plot & sanity check 

In [None]:
exptype_wintype_id_dict = {}   # keys: experiment_type, window_type, mouse_id, value: [uuid1, uuid2, ...]
for uuid in traces_meta_dict.keys():
    if is_chr2:
        exp_type = traces_meta_dict[uuid]["exp_type"]
    else:
        exp_type = "tmev"
    win_type = traces_meta_dict[uuid]["window_type"]
    mouse_id = traces_meta_dict[uuid]["mouse_id"]
    if exp_type not in exptype_wintype_id_dict.keys():
        exptype_wintype_id_dict[exp_type] = dict()
    if win_type not in exptype_wintype_id_dict[exp_type].keys():
        exptype_wintype_id_dict[exp_type][win_type] = dict()
    if mouse_id not in exptype_wintype_id_dict[exp_type][win_type].keys():
        exptype_wintype_id_dict[exp_type][win_type][mouse_id] = []  # list of uuids
    exptype_wintype_id_dict[exp_type][win_type][mouse_id].append(uuid)

In [None]:
df_stats_only_bl_am['avg_speed'] = df_stats_only_bl_am['avg_speed'].fillna(0)

In [None]:
assert df_stats_only_bl_am["avg_speed"].isna().sum() == 0

In [None]:
def plotNikLoco(exp_type, cut_to_segments=True, bl_equal_post = True,  show_debug=False, show_fig=True, n_bl_frames=bl_manual_length, ):
    AMPLITUDE = LV_SPEED_AMPL
    offset = 0
    # set plotting limits in case we want to cut to the segments
    t_min = np.inf
    t_max = -1
    
    n_recordings_with_type =  0
    for win_type in exptype_wintype_id_dict[exp_type].keys():
        for mouse_id in exptype_wintype_id_dict[exp_type][win_type].keys():
            for event_uuid in exptype_wintype_id_dict[exp_type][win_type][mouse_id]:
                n_recordings_with_type += 1
    fig = plt.figure(figsize=(18,n_recordings_with_type*3))
    mouse_ids = traces_dict.keys()
    prev_range = 0.0
    for win_type in exptype_wintype_id_dict[exp_type].keys():
        for mouse_id in exptype_wintype_id_dict[exp_type][win_type].keys(): 
            for event_uuid in exptype_wintype_id_dict[exp_type][win_type][mouse_id]:
                df_quantities = df_stats_only_bl_am[df_stats_only_bl_am["event_uuid"] == event_uuid]
                print(event_uuid)
                bl_totdist_abs_norm = df_quantities[df_quantities["segment_type"] == value_mapping["bl"]].totdist_abs_norm.iloc[0]
                am_totdist_abs_norm = df_quantities[df_quantities["segment_type"] == value_mapping["am"]].totdist_abs_norm.iloc[0]
                bl_running_episodes = df_quantities[df_quantities["segment_type"] == value_mapping["bl"]].running_episodes.iloc[0]
                am_running_episodes = df_quantities[df_quantities["segment_type"] == value_mapping["am"]].running_episodes.iloc[0]
                bl_running_percent = df_quantities[df_quantities["segment_type"] == value_mapping["bl"]]["running%"].iloc[0]
                am_running_percent = df_quantities[df_quantities["segment_type"] == value_mapping["am"]]["running%"].iloc[0]
                bl_avg_speed = df_quantities[df_quantities["segment_type"] == value_mapping["bl"]].avg_speed.iloc[0]
                am_avg_speed = df_quantities[df_quantities["segment_type"] == value_mapping["am"]].avg_speed.iloc[0]
                bl_running_episodes_mean_length = df_quantities[df_quantities["segment_type"] == value_mapping["bl"]].running_episodes_mean_length.iloc[0]
                am_running_episodes_mean_length = df_quantities[df_quantities["segment_type"] == value_mapping["am"]].running_episodes_mean_length.iloc[0]

                if is_chr2:  # chr2 experiments contain the whole session in one file
                    df_segments = ddoc.getSegmentsForUUID(event_uuid)
                    i_frame_stim_begin = df_segments[df_segments["interval_type"] == "stimulation"].frame_begin.iloc[0] - 1
                    i_frame_stim_end = df_segments[df_segments["interval_type"] == "stimulation"].frame_end.iloc[0] - 1  # in 1 indexing, inclusive
                else:  # in tmev recordings, there is no stim, but it is the seizure segment (see value_mapping)
                    metadata_dict = traces_meta_dict[event_uuid]
                    i_frame_stim_begin = metadata_dict["n_bl_frames"]
                    i_frame_stim_end = metadata_dict["n_frames"] - metadata_dict["n_am_frames"]


                nik_trace = traces_dict[event_uuid]["mean_fluo"]
                
                if is_chr2:
                    nik_trace[i_frame_stim_begin:i_frame_stim_end] = 1.2*max(nik_trace[i_frame_stim_end+1:])  # reduce stim amplitude

                min_nik = min(nik_trace)
                max_nik = max(nik_trace)

                t = traces_dict[event_uuid]["lv_t_s"]
                
                # get begin and end time points of baseline and post-stim segments 
                i_frame_bl_end = i_frame_stim_begin
                i_frame_post_begin = i_frame_stim_end
                if n_bl_frames < i_frame_stim_begin:
                    i_frame_bl_begin = i_frame_bl_end - n_bl_frames 
                else:
                    i_frame_bl_begin = 0 
                if bl_equal_post:
                    i_frame_post_end = i_frame_post_begin + n_bl_frames
                if not bl_equal_post or i_frame_post_end > len(nik_trace):
                    i_frame_post_end = len(nik_trace) - 1
                    
                # mark segment borders
                #plt.vlines(x=[t[i_frame_bl_begin], t[i_frame_bl_end], t[i_frame_post_begin], t[i_frame_post_end]], ymin = offset, ymax = offset+2.2*AMPLITUDE, color="orange")
                plt.vlines(x=t[begin_end_frames_dict[event_uuid]], ymin = offset, ymax = offset+2.2*AMPLITUDE, color="orange")
                
                
                if cut_to_segments: 
                    if t[i_frame_bl_begin] < t_min:
                        t_min = t[i_frame_bl_begin]
                    if t[i_frame_post_end] > t_max:
                        t_max = t[i_frame_post_end]
                
                labview_trace = traces_dict[event_uuid]["lv_speed"]
                
                if show_debug:
                    
                    # add totdist_abs_norm values for bl and post-stim
                    plt.text(t[i_frame_bl_begin] + (t[i_frame_bl_end] - t[i_frame_bl_begin])*0.1, offset+1.5*AMPLITUDE, f"d={bl_totdist_abs_norm:.3f}, {bl_running_percent:.2f}%, eps={bl_running_episodes} mean {bl_running_episodes_mean_length:.2f},\nv={bl_avg_speed:.3f}", fontsize=20, color="red")
                    plt.text(t[i_frame_post_begin] + (t[i_frame_post_end] - t[i_frame_post_begin])*0.1, offset+1.5*AMPLITUDE, f"d={am_totdist_abs_norm:.3f}, {am_running_percent:.2f}%, eps={am_running_episodes} mean {am_running_episodes_mean_length:.2f},\nv={am_avg_speed:.3f}, uuid: {event_uuid}", fontsize=20, color="red")
                    # plot running binary data
                    
                    #labview_running = traces_dict[event_uuid]["lv_running"]
                    #running_episodes = get_episodes(traces_dict[event_uuid]["lv_running"], True, EPISODE_MERGE_THRESHOLD_FRAMES, True)

                    # apply a filter to episodes, discard those that do not fulfill the criteria
                    #running_episodes = apply_threshold(traces_dict[event_uuid]["lv_speed"], running_episodes, AMPL_THRESHOLD, TEMP_THRESHOLD)
                    
                    #labview_running_merged = np.zeros(labview_running.shape)
                    
                    #for episode in running_episodes:
                    #    labview_running_merged[episode[0]:episode[1]+1] = 1
                    
                    labview_running_merged = loco_binary_traces[event_uuid]
                    
                    # for checking if running% is correct: gather all running episodes to end of each segment to visualize the %
                    #running_pre = np.sort(labview_running_merged[:i_frame_bl_begin].copy())
                    #running_bl = np.sort(labview_running_merged[i_frame_bl_begin:i_frame_bl_end].copy())
                    #running_stim = np.sort(labview_running_merged[i_frame_bl_end:i_frame_stim_end].copy())
                    #running_post = np.sort(labview_running_merged[i_frame_post_begin:i_frame_post_end].copy())
                    #running_rest = np.sort(labview_running_merged[i_frame_post_end:].copy())
                    #running_list = [running_pre, running_bl, running_stim, running_post, running_rest]                   
                    #sorted_episodes = np.concatenate(running_list)
                    #plt.plot(t, 0.5*AMPLITUDE*sorted_episodes+offset+ 0.5*AMPLITUDE, color="red")
                    
                    # plot the binary locomotion trace (filtered)
                    #plt.plot(t, 0.5*AMPLITUDE*labview_running_merged+offset+ 0.5*AMPLITUDE, color="red")
                    # plot only horizontal lines where fitlered binary locomotion trace shows locomotion
                    episodes = get_episodes(labview_running_merged, merge_episodes=False, merge_threshold_frames=0, return_begin_end_frames = True)
                    # convert [(i_begin, i_end), ...] to [i_begin1, i_begin2, ...], [i_end1, i_end2, ...]
                    episode_begin_frames = np.array([episode[0] for episode in episodes])
                    episode_end_frames = np.array([episode[1] for episode in episodes])
                    if len(episode_begin_frames) > 0 :
                        plt.hlines(xmin = t[episode_begin_frames], xmax = t[episode_end_frames], y=[offset-0.1*AMPLITUDE for i in range(len(episode_begin_frames))], linewidth=6, color="red")

                
                
                min_lv = min(labview_trace)
                max_lv = max(labview_trace)
                # plt.plot(t, AMPLITUDE*(labview_trace - min_lv)/(max_lv - min_lv)+offset, color="black")
                # amplitude is matched to labview speed raw values, so no scaling here
                plt.plot(t, labview_trace - min_lv + offset, color="black")
                
                
                offset +=1.1*AMPLITUDE



                color = df_colors[df_colors["mouse_id"] == mouse_id].color.iloc[0]
                plt.plot(t, AMPLITUDE*(nik_trace- min_nik)/(max_nik - min_nik)+offset, color=color)
                offset+=1.1*AMPLITUDE





                if (len(traces_dict[event_uuid]["mean_fluo"]) != 13483):
                    print(f'{event_uuid}: {len(traces_dict[event_uuid]["mean_fluo"])}, {mouse_id}, {exp_type}')



    if cut_to_segments:
        ax = plt.gca()
        ax.set_xlim((t_min, t_max))
    print(f"Total: {n_recordings_with_type} traces")
    plt.suptitle(exp_type, fontsize=22)
    #plt.axis("off")
    plt.yticks([])
    plt.xlabel("Time (s)", fontsize=14)
    plt.tight_layout()
    #plt.xlim((300, 460))  # 250, 500
    if save_figs:
        if not show_debug:
            out_fpath = f"D:\\Downloads\\waterfall_{exp_type}_{get_datetime_for_fname()}{file_format}"        
        else:
            out_fpath = f"D:\\Downloads\\waterfall_sanity_check_{exp_type}_{get_datetime_for_fname()}{file_format}"
        plt.savefig(out_fpath,bbox_inches='tight', dpi=300)
        print(f"Saved as {out_fpath}")
    if show_fig:
        plt.show()


In [None]:
def compareLoco(exp_type):
    AMPLITUDE = LV_SPEED_AMPL
    offset = 0
    n_recordings_with_type =  0
    for win_type in exptype_wintype_id_dict[exp_type].keys():
        for mouse_id in exptype_wintype_id_dict[exp_type][win_type].keys():
            for event_uuid in exptype_wintype_id_dict[exp_type][win_type][mouse_id]:
                n_recordings_with_type += 1
    fig = plt.figure(figsize=(18,n_recordings_with_type*3))
    mouse_ids = traces_dict.keys()
    prev_range = 0.0
    for win_type in exptype_wintype_id_dict[exp_type].keys():
        for mouse_id in exptype_wintype_id_dict[exp_type][win_type].keys():
            for event_uuid in exptype_wintype_id_dict[exp_type][win_type][mouse_id]:
                print(event_uuid)
                if is_chr2:
                    df_segments = ddoc.getSegmentsForUUID(event_uuid)
                    i_frame_stim_begin = df_segments[df_segments["interval_type"] == "stimulation"].frame_begin.iloc[0] - 1
                    i_frame_stim_end = df_segments[df_segments["interval_type"] == "stimulation"].frame_end.iloc[0] - 1  # in 1 indexing, inclusive
                else:
                    metadata_dict = traces_meta_dict[event_uuid]
                    i_frame_stim_begin = metadata_dict["n_bl_frames"]
                    i_frame_stim_end = metadata_dict["n_frames"] - metadata_dict["n_am_frames"]
                
                lfp_mov_trace = traces_dict[event_uuid]["lfp_mov_y"]
                t_lfp = traces_dict[event_uuid]["lfp_mov_t"]
                lfp_min = min(lfp_mov_trace)
                lfp_max = max(lfp_mov_trace)


                t = traces_dict[event_uuid]["lv_t_s"]

                labview_trace = traces_dict[event_uuid]["lv_speed"]
                min_lv = min(labview_trace[10:])
                max_lv = max(labview_trace[10:])
                # scale LFP to match LabView in amplitude, at least roughly
                plt.plot(t_lfp, AMPLITUDE*(lfp_mov_trace - lfp_min)/(lfp_max - lfp_min)+offset, color="black")
                offset +=1.1*AMPLITUDE

                if t[0] +1000 < t_lfp[0]:
                    print(f"{event_uuid} {mouse_id}")


                color = df_colors[df_colors["mouse_id"] == mouse_id].color.iloc[0]
                # do not scale, keep original amplitude
                plt.plot(t, labview_trace- min_lv+offset, color=color)
                offset+=1.1*AMPLITUDE





                if (len(traces_dict[event_uuid]["mean_fluo"]) != 13483):
                    print(f'{event_uuid}: {len(traces_dict[event_uuid]["mean_fluo"])}, {mouse_id}, {exp_type}')





    #plt.axis("off")
    plt.suptitle(exp_type, fontsize=22)
    plt.yticks([])
    plt.xlabel("Time (s)", fontsize=14)
    #plt.xlim((300, 460))  # 250, 500
    plt.tight_layout()
    if save_figs:
        out_fpath = f"D:\\Downloads\\traces_normalized_{get_datetime_for_fname()}{file_format}"
        plt.savefig(out_fpath,bbox_inches='tight', dpi=300)
        print(f"Saved as {out_fpath}")
    plt.show()

In [None]:
def waterfallLoco(exp_type, show_segments=False, bl_equal_post=True, n_bl_frames=bl_manual_length):
    AMPLITUDE = LV_SPEED_AMPL
    offset = 0

    n_recordings_with_type =  0
    for win_type in exptype_wintype_id_dict[exp_type].keys():
        for mouse_id in exptype_wintype_id_dict[exp_type][win_type].keys():
            for event_uuid in exptype_wintype_id_dict[exp_type][win_type][mouse_id]:
                n_recordings_with_type += 1
    fig = plt.figure(figsize=(18,n_recordings_with_type*3))
    mouse_ids = traces_dict.keys()
    prev_range = 0.0
    for win_type in exptype_wintype_id_dict[exp_type].keys():
        for mouse_id in exptype_wintype_id_dict[exp_type][win_type].keys(): 
            for event_uuid in exptype_wintype_id_dict[exp_type][win_type][mouse_id]:
                if is_chr2:  # chr2 experiments contain the whole session in one file
                    df_segments = ddoc.getSegmentsForUUID(event_uuid)
                    i_frame_stim_begin = df_segments[df_segments["interval_type"] == "stimulation"].frame_begin.iloc[0] - 1
                    i_frame_stim_end = df_segments[df_segments["interval_type"] == "stimulation"].frame_end.iloc[0] - 1  # in 1 indexing, inclusive
                else:  # in tmev recordings, there is no stim, but it is the seizure segment (see value_mapping)
                    metadata_dict = traces_meta_dict[event_uuid]
                    i_frame_stim_begin = metadata_dict["n_bl_frames"]
                    i_frame_stim_end = metadata_dict["n_frames"] - metadata_dict["n_am_frames"]
                    

                t = traces_dict[event_uuid]["lv_t_s"]
                
                if show_segments:
                    # get begin and end time points of baseline and post-stim segments 
                    i_frame_bl_end = i_frame_stim_begin
                    i_frame_post_begin = i_frame_stim_end
                    if n_bl_frames < i_frame_stim_begin:
                        i_frame_bl_begin = i_frame_bl_end - n_bl_frames 
                    else:
                        i_frame_bl_begin = 0
                    if bl_equal_post:
                        i_frame_post_end = i_frame_post_begin + n_bl_frames
                    if not bl_equal_post or i_frame_post_end >= len(t):
                        i_frame_post_end = len(t) - 1
                    # plot them
                    print(i_frame_post_end)
                    plt.vlines(x=t[begin_end_frames_dict[event_uuid]], ymin = offset, ymax = offset+AMPLITUDE, color="orange")
                

                
                labview_trace = traces_dict[event_uuid]["lv_speed"]
                min_lv = min(labview_trace)
                max_lv = max(labview_trace)
                color = df_colors[df_colors["mouse_id"] == mouse_id].color.iloc[0]
                plt.plot(t, labview_trace - min_lv+offset, color=color)
                
                offset +=1.3*AMPLITUDE
                
                if (len(traces_dict[event_uuid]["mean_fluo"]) != 13483):
                    print(f'{event_uuid}: {len(traces_dict[event_uuid]["mean_fluo"])}, {mouse_id}, {exp_type}')




    print(f"Total: {n_recordings_with_type} traces")
    plt.suptitle(exp_type, fontsize=22)
    #plt.axis("off")
    plt.yticks([])
    plt.xlabel("Time (s)", fontsize=14)
    plt.tight_layout()
    #plt.xlim((300, 460))  # 250, 500
    if save_figs:
        out_fpath = f"D:\\Downloads\\waterfall_loco_{exp_type}_{get_datetime_for_fname()}{file_format}"
        plt.savefig(out_fpath,bbox_inches='tight', dpi=300)
        print(f"Saved as {out_fpath}")
    plt.show()

### Plot one of the categories

In [None]:
df_stats.exp_type.unique()

In [None]:
save_sanity_check = False  # make sure to set save_figs to True as well
if save_sanity_check:
    for exp_type in df_stats.exp_type.unique(): 
        plotNikLoco(exp_type, cut_to_segments=False, bl_equal_post=True, show_debug=True, show_fig=False)  # include all information useful for debugging

In [None]:
save_waterfall = False
if save_waterfall:
    for exp_type in df_stats.exp_type.unique():
        plotNikLoco(exp_type, cut_to_segments=True, bl_equal_post=True, show_debug=False, show_fig=False)  # include all information useful for debugging

In [None]:
# TODO: if debug, set x limits to the segments (i.e. 4500 + stim + 4500 frames, for example)

In [None]:
fig = plt.figure(figsize=(18,42))
AMP = 100.0
offset = 0.0
for event_uuid in traces_dict.keys():
    i_bl_end = begin_end_frames_dict[event_uuid][1]
    i_am_begin = begin_end_frames_dict[event_uuid][2]
    tr = traces_dict[event_uuid]["mean_fluo"][i_bl_end-5:i_am_begin+5]
    min_tr = min(tr)
    max_tr = max(tr)
    plt.plot(AMP*(tr - min_tr)/(max_tr - min_tr) + offset, )
    plt.text(0.0, offset, f"{event_uuid}, {traces_meta_dict[event_uuid]['mouse_id']}", fontsize=10)
    offset += 1.1*AMP
plt.show()

In [None]:
if is_chr2:
    plotNikLoco("chr2_sd", cut_to_segments=False, bl_equal_post=True, show_debug=True, show_fig=True)
else:
    plotNikLoco("tmev", cut_to_segments=False, bl_equal_post=True, show_debug=True, show_fig=True)
    

In [None]:
if is_chr2:
    waterfallLoco("chr2_sd", True)
else:
    waterfallLoco("tmev", True)
    

In [None]:
#compareLoco("tmev")

# Export results

## Export all metrics (individual session data) to Excel

In [None]:
df_to_save = df_stats[(df_stats["segment_type"].isin([value_mapping["bl"], value_mapping["am"]]))]

In [None]:
if export_results:
    if is_chr2:
        output_fpath = os.path.join(output_folder, f"loco_data_chr2_{get_datetime_for_fname()}.xlsx")
    else:
        output_fpath = os.path.join(output_folder, f"loco_data_tmev_{get_datetime_for_fname()}.xlsx")
    df_to_save.to_excel(output_fpath, index=False)
    print(f"Results exported to {output_fpath}")
    

## Export mouse aggregates

In [None]:
df_to_save_aggregate = df_stats_per_mouse_mean[df_stats_per_mouse_mean["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])].sort_values(by=["mouse_id", "exp_type", "segment_type"])

In [None]:
if export_results:
    if is_chr2:
        output_fpath = os.path.join(output_folder, f"loco_data_aggregate_chr2_{get_datetime_for_fname()}.xlsx")
    else:
        output_fpath = os.path.join(output_folder, f"loco_data_aggregate_tmev_{get_datetime_for_fname()}.xlsx")
    df_to_save_aggregate.to_excel(output_fpath, index=False)
    print(f"Results exported to {output_fpath}")

# Export to Matlab

## Individual session data

In [None]:
if is_chr2:
    output_fpath = os.path.join(output_folder, f"loco_data_chr2_{get_datetime_for_fname()}.mat")
else:
    output_fpath = os.path.join(output_folder, f"loco_data_tmev_{get_datetime_for_fname()}.mat")
print(f"Saving session-level data to workspace\n\t{output_fpath}")

In [None]:
if export_results:
    eng = matlab.engine.start_matlab()

    for colname in df_to_save.columns:
        dtype = df_to_save[colname].dtype
        if "%" in colname:
            colname_matlab = colname.replace("%", "percent")
        else:
            colname_matlab = colname
        if dtype == np.object_:  # strings are represented as object_ in np array
            eng.workspace[colname_matlab] = list(np.array(df_to_save[colname]))
        elif dtype == np.int64:
            eng.workspace[colname_matlab] = matlab.int64(list(df_to_save[colname]))
        elif dtype == np.int32:
            eng.workspace[colname_matlab] = matlab.int32(list(df_to_save[colname]))
        elif dtype == np.float64:
            eng.workspace[colname_matlab] = matlab.double(list(df_to_save[colname]))
        else:
            raise NotImplementedError(f"{dtype} not implemented yet!")

    eng.eval(f"save('{output_fpath}')", nargout=0)
    print("Saved successfully.")
    eng.quit()

## Save aggregate data to workspace

In [None]:
if is_chr2:
    output_fpath = os.path.join(output_folder, f"loco_data_aggregate_chr2_{get_datetime_for_fname()}.mat")
else:
    output_fpath = os.path.join(output_folder, f"loco_data_aggregate_tmev_{get_datetime_for_fname()}.mat")
print(f"Saving mouse aggregate data to workspace\n\t{output_fpath}")

In [None]:
if export_results:
    eng = matlab.engine.start_matlab()

    for colname in df_to_save_aggregate.columns:
        dtype = df_to_save_aggregate[colname].dtype
        if "%" in colname:
            colname_matlab = colname.replace("%", "percent")
        else:
            colname_matlab = colname
        if dtype == np.object_:  # strings are represented as object_ in np array
            eng.workspace[colname_matlab] = list(np.array(df_to_save_aggregate[colname]))
        elif dtype == np.int64:
            eng.workspace[colname_matlab] = matlab.int64(list(df_to_save_aggregate[colname]))
        elif dtype == np.float64:
            eng.workspace[colname_matlab] = matlab.double(list(df_to_save_aggregate[colname]))
        else:
            raise NotImplementedError(f"{dtype} not implemented yet!")

    eng.eval(f"save('{output_fpath}')", nargout=0)
    print("Saved successfully.")
    eng.quit()

# Save statistical test results

## Individual sessions as samples

In [None]:
# form: [[window_type, metric, experiment_type, n_samples, t_statistic, p],  [...], ]
stat_colnames = ["window_type", "metric", "experiment_type", "n_samples", "t_statistic", "p"]

In [None]:
stat_data = []

In [None]:
if export_results:
    # start with CA1
    if len(df_stats[df_stats["window_type"] == "CA1"]) > 0:
        for statistic in STAT_METRICS:
            for exp_type, exp_g in stat_data_ca1.sort_values(by="exp_type").groupby("exp_type"):
                ttest_result = paired_t_test(exp_g, statistic)
                t_stat = ttest_result.statistic
                p = ttest_result.pvalue
                row = []
                row.append("CA1")
                row.append(statistic)
                row.append(exp_type)
                row.append(len(exp_g[exp_g["segment_type"] == value_mapping["bl"]]))
                row.append(t_stat)
                row.append(p)
                stat_data.append(row)
    else:
        print("Skipping CA1, no samples found...")
    if len(df_stats[df_stats["window_type"] == "NC"]) > 0:
        for statistic in STAT_METRICS:
            for exp_type, exp_g in stat_data_nc.sort_values(by="exp_type").groupby("exp_type"):
                ttest_result = paired_t_test(exp_g, statistic)
                t_stat = ttest_result.statistic
                p = ttest_result.pvalue
                row = []
                row.append("NC")
                row.append(statistic)
                row.append(exp_type)
                row.append(len(exp_g[exp_g["segment_type"] == value_mapping["bl"]]))
                row.append(t_stat)
                row.append(p)
                stat_data.append(row)
    else:
        print("Skipping NC, no samples found...")

In [None]:
if export_results:
    if is_chr2:
        output_fpath = os.path.join(output_folder,  f"loco_stattest_chr2_{get_datetime_for_fname()}.xlsx")
    else:
        output_fpath = os.path.join(output_folder,  f"loco_stattest_tmev_{get_datetime_for_fname()}.xlsx")
    df_exp_stat = pd.DataFrame(data=stat_data, columns=stat_colnames)
    df_exp_stat.to_excel(output_fpath, index=False)
    #with open(output_fpath, 'w', newline='\n') as csvfile:
    #    writer = csv.writer(csvfile, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL)
    #    writer.writerow(stat_colnames)
    #    for row in stat_data:
    #        writer.writerow(row)
    print(f"Saved to\n\t{output_fpath}")

## Mouse aggregate values

In [None]:
stat_data_aggregate = []
if export_results:
    # start with CA1
    if len(df_stats[df_stats["window_type"] == "CA1"]) > 0:
        for statistic in STAT_METRICS:
            for exp_type, exp_g in stat_data_mouse_means_ca1.sort_values(by="exp_type").groupby("exp_type"):
                ttest_result = paired_t_test(exp_g, statistic, pair_by="mouse_id")
                t_stat = ttest_result.statistic
                p = ttest_result.pvalue
                row = []
                row.append("CA1")
                row.append(statistic)
                row.append(exp_type)
                row.append(len(exp_g[exp_g["segment_type"] == value_mapping["bl"]]))
                row.append(t_stat)
                row.append(p)
                stat_data_aggregate.append(row)
    else:
        print("Skipping CA1, no samples found...")
    if len(df_stats[df_stats["window_type"] == "NC"]) > 0:
        for statistic in STAT_METRICS:
            for exp_type, exp_g in stat_data_mouse_means_nc.sort_values(by="exp_type").groupby("exp_type"):
                ttest_result = paired_t_test(exp_g, statistic, pair_by="mouse_id")
                t_stat = ttest_result.statistic
                p = ttest_result.pvalue
                row = []
                row.append("NC")
                row.append(statistic)
                row.append(exp_type)
                row.append(len(exp_g[exp_g["segment_type"] == value_mapping["bl"]]))
                row.append(t_stat)
                row.append(p)
                stat_data_aggregate.append(row)
    else:
        print("Skipping NC, no samples found...")

In [None]:
if export_results:
    if is_chr2:
        output_fpath = os.path.join(output_folder,  f"loco_stattest_aggregate_tmev_{get_datetime_for_fname()}.xlsx")
    else:
        output_fpath = os.path.join(output_folder,  f"loco_stattest_aggregate_tmev_{get_datetime_for_fname()}.xlsx")
    df_exp_stat_aggregate = pd.DataFrame(data=stat_data_aggregate, columns=stat_colnames)
    df_exp_stat_aggregate.to_excel(output_fpath, index=False)
    #with open(output_fpath, 'w', newline='\n') as csvfile:
    #    writer = csv.writer(csvfile, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL)
    #    writer.writerow(stat_colnames)
    #    for row in stat_data_aggregate:
    #        writer.writerow(row)
    print(f"Saved to\n\t{output_fpath}")