# Locomotion analysis v3.0
This script works with the pre-assembled dataset, cutting out most of the acquiring data part, resulting in cleaner and more reliable analysis (the dataset is manually checked separately from this script).

# Set version

### Set up export figure parameters

In [None]:
save_figs = False  # set to True to save the figures created
save_as_pdf = False
#save_as_eps = False

if save_as_pdf:
    file_format = ".pdf"
else:
    file_format = ".jpg"
if save_figs:
    print(f"Going to save figures as {file_format} files.")

In [None]:
export_datasets = False  # set to True to export the resulting datasets

In [None]:
output_version = "v1.2"

# Import libraries

In [None]:
#Auto-reload modules (used to develop functions outside this notebook)
%load_ext autoreload
%autoreload 2

In [None]:
import labrotation.file_handling as fh
import h5py
from time import time
import matplotlib.pyplot as plt
import numpy as np
import os
from labrotation import file_handling as fh
from copy import deepcopy
import pandas as pd
import labrotation.two_photon_session as tps
import seaborn as sns
import uuid  # for unique labeling of sessions and coupling arrays (mouse velocity, distance, ...) to sessions in dataframe 
from matplotlib import cm  # colormap
import datadoc_util
from labrotation import two_photon_session as tps
from datetime import datetime
import seaborn as sns

# Set seaborn parameters

In [None]:
sns.set(font_scale=2)
sns.set_style("whitegrid")

# If exists, load environmental variables from .env file

In [None]:
env_dict = dict()
if not os.path.exists("./.env"):
    print(".env does not exist")
else:
    with open("./.env", "r") as f:
        for line in f.readlines():
            l = line.rstrip().split("=")
            env_dict[l[0]] = l[1]
print(env_dict.keys())

# Set up data documentation directory

In [None]:
# assumption: inside the documentation folder, the subfolders carry the id of each mouse (not exact necessarily, but they 
# can be identified by the name of the subfolder). 
# Inside the subfolder xy (for mouse xy), xy_grouping.xlsx and xy_segmentation.xlsx can be found.
# xy_grouping.xlsx serves the purpose of finding the recordings belonging together, and has columns:
# folder, nd2, labview, lfp, face_cam_last, nikon_meta, experiment_type, day
# xy_segmentation.xlsx contains frame-by-frame (given by a set of disjoint intervals forming a cover for the whole recording) 
# classification of the events in the recording ("normal", seizure ("sz"), sd wave ("sd_wave") etc.). The columns:
# folder, interval_type, frame_begin, frame_end.

# TODO: write documentation on contents of xlsx files (what the columns are etc.)
if "DATA_DOCU_FOLDER" in env_dict.keys():
    docu_folder = env_dict["DATA_DOCU_FOLDER"]
else:
    docu_folder = fh.open_dir("Choose folder containing folders for each mouse!")
print(f"Selected folder:\n\t{docu_folder}")

In [None]:
if "documentation" in os.listdir(docu_folder):
    mouse_folder = os.path.join(docu_folder, "documentation")
else:
    mouse_folder = docu_folder
mouse_names = os.listdir(mouse_folder)
print(f"Mice detected:")
for mouse in mouse_names:
    print(f"\t{mouse}")

In [None]:
def get_datetime_for_fname():
    now = datetime.now()
    return f"{now.year:04d}{now.month:02d}{now.day:02d}-{now.hour:02d}{now.minute:02d}{now.second:02d}"

In [None]:
output_folder = env_dict["DOWNLOADS_FOLDER"]
print(f"Output files will be saved to {output_folder}")

### Load matlab-2p

In [None]:
if "MATLAB_2P_FOLDER" in env_dict.keys():
    matlab_2p_folder = env_dict["MATLAB_2P_FOLDER"]
else:
    matlab_2p_folder = fh.open_dir("Choose matlab-2p folder")
print(f"matlab-2p folder set to:\n\t{matlab_2p_folder}")

### Load data documentation

In [None]:
ddoc = datadoc_util.DataDocumentation(docu_folder)
ddoc.loadDataDoc()

### Set up color coding
for now, only possible to assign a color to each mouse. Later, when event uuids available, need to map event uuid to color code

In [None]:
df_colors = ddoc.getColorings()

In [None]:
dict_colors_mouse = df_colors[["mouse_id", "color"]].to_dict(orient="list")
dict_colors_mouse = dict(zip(dict_colors_mouse["mouse_id"], dict_colors_mouse["color"]))

In [None]:
#dict_colors_mouse["T413"] = "#000000"  # set one to black

### Load events_list dataset

In [None]:
events_list_fpath = os.path.join(docu_folder, "events_list.xlsx")
assert os.path.exists(events_list_fpath)

df_events_list = pd.read_excel(events_list_fpath)

## Load dataset

In [None]:
assembled_traces_fpath = fh.open_file("Open assembled_traces h5 file!")

In [None]:
if "ChR2" in assembled_traces_fpath:
    used_mouse_ids = ["OPI-2239", "WEZ-8917", "WEZ-8924"]
    print("Recognized ChR2 dataset")

In [None]:
traces_dict = dict()  
traces_meta_dict = dict()
# first keys are event uuids, inside the following dataset names:
# 'lfp_mov_t', 'lfp_mov_y', 'lfp_t', 'lfp_y', 'lv_dist', 'lv_rounds', 
# 'lv_running', 'lv_speed', 'lv_t_s', 'lv_totdist', 'mean_fluo'
with h5py.File(assembled_traces_fpath, "r") as hf:
    for uuid in hf.keys():
        if ("ChR2" not in assembled_traces_fpath) or (hf[uuid].attrs["mouse_id"] in used_mouse_ids):
            session_dataset_dict = dict() 
            session_meta_dict = dict()
            for dataset_name in hf[uuid].keys():
                session_dataset_dict[dataset_name] = np.array(hf[uuid][dataset_name])
            for attr_name in hf[uuid].attrs:
                session_meta_dict[attr_name] = hf[uuid].attrs[attr_name]
            traces_dict[uuid] = session_dataset_dict.copy()
            traces_meta_dict[uuid] = session_meta_dict.copy()

# Calculate locomotion statistics

In [None]:
def get_episodes(segment, merge_episodes=False, merge_threshold_frames=8):
    n_eps = 0
    episode_lengths = []  # in frame units
    episodes = []
    n_episodes = 0
    current_episode_len = 0
    
    episode_begin = 0
    episode_end = 0
    
    # algorithm: detect episode begin and episode end. record it in list
    
    for i_frame in range(len(segment)-1):  # check current and next element for end of a episode: ...100...
        if segment[i_frame] == 1:  # current frame is part of an episode
            # increase current episode length
            if i_frame == 0 or segment[i_frame - 1] == 0:  # check if beginning of an episode
                episode_begin = i_frame
            current_episode_len += 1
            if segment[i_frame+1] == 0: # episode ends with next frame
                n_episodes += 1
                episode_lengths.append(current_episode_len)
                episodes.append((episode_begin, i_frame))
                current_episode_len = 0
    if segment[-1] == 1: # check if there is one episode that does not end
        n_episodes += 1
        # add last segment to segments list
        current_episode_len += 1
        episode_lengths.append(current_episode_len)
        current_episode_len = 0
        
    assert current_episode_len == 0
    if merge_episodes:
        if len(episodes) < 2:  # single (or zero) episode cannot be merged
            return episodes
        
        # merge episodes that are close to each other
        episodes_merged = []
        merge_threshold_frames = 8  # at 15 Hz, ~0.5 s tolerance to merge

        episode_begin = episodes[0][0]
        episode_end = episodes[0][1]
        # starting with second episode, check if current episode can be merged with previous. If yes, update episode_end.
        # If not, add previous episode to list, update episode_begin and episode_end to current episode values
        
        
        for i_episode in range(1, len(episodes)):
            current_episode_begin = episodes[i_episode][0]
            current_episode_end = episodes[i_episode][1]

            delta = current_episode_begin - episode_end
            
            if delta <= merge_threshold_frames:  # merge current episode to previous one
                episode_end = current_episode_end
            else:  # add previous episode to list, start with current episode
                episodes_merged.append((episode_begin, episode_end))
                episode_begin = current_episode_begin
                episode_end = current_episode_end

        episode_lengths_merged = [ep[1]-ep[0] + 1 for ep in episodes_merged]
        return episode_lengths_merged
    return episode_lengths  # len() shows n_episodes

In [None]:
use_manual_bl_am_length = True
bl_manual_length = 4500
am_manual_length = 4500


# each entry (row) should have columns: 
# uuid of event, mouse id, window type, segment type (bl/sz/am), segment length in frames, totdist, running, speed
list_statistics = []  
dict_episodes = {}

for event_uuid in traces_dict.keys():
    mouse_id = traces_meta_dict[event_uuid]["mouse_id"]
    win_type = traces_meta_dict[event_uuid]["window_type"]
    # get segment lengths
    n_bl_frames = traces_meta_dict[event_uuid]["n_bl_frames"]
    n_am_frames = traces_meta_dict[event_uuid]["n_am_frames"]
    n_frames = traces_meta_dict[event_uuid]["n_frames"]
    n_sz_frames = n_frames - n_am_frames - n_bl_frames
    
    if use_manual_bl_am_length:
        if (bl_manual_length > n_bl_frames) or (am_manual_length > n_am_frames):
            print(f"{mouse_id} {event_uuid}:\n\tNot enough bl ({n_bl_frames}, {bl_manual_length} required) or am ({n_am_frames}, {am_manual_length} required) frames available. Skipping...")
            continue
        # todo: set first and last frames for bl and am (as well as sz). If not use_manual_bl_am_length, also set it!
        # then modify code below to first and last frames
        else:
            # define baseline as last frame before sz segment, and starting bl_manual_length frames before
            last_frame_bl = n_bl_frames - 1  # 0 indexing: last bl frame, inclusive
            first_frame_bl = last_frame_bl - bl_manual_length + 1  # inclusive
            
            # define aftermath as first frame after sz segment, and ending am_manual_length frames after
            first_frame_am = n_bl_frames+n_sz_frames  # inclusive
            last_frame_am = first_frame_am + am_manual_length - 1  # inclusive
            
            #convert to [begin, end), i.e. left inclusive, right exclusive, for numpy indexing
            last_frame_bl += 1
            last_frame_am += 1
            
            
            n_bl_frames = last_frame_bl - first_frame_bl
            n_am_frames = last_frame_am - first_frame_am
            n_frames = last_frame_am - first_frame_bl
            assert n_sz_frames == n_frames - n_bl_frames - n_am_frames
            
    else:
        first_frame_bl = 0  # inclusive
        last_frame_bl = n_bl_frames  # exclusive
        
        first_frame_am = n_bl_frames+n_sz_frames  # inclusive
        last_frame_am = n_frames  # exclusive
    
    # print(f"{ddoc.getNikonFileNameForUuid(event_uuid)}:\n\t{n_bl_frames} bl, {n_sz_frames} mid, {n_am_frames} am")
    # get movement data
    lv_totdist = traces_dict[event_uuid]["lv_totdist"]
    lv_totdist_abs = traces_dict[event_uuid]["lv_totdist_abs"]
    lv_running = traces_dict[event_uuid]["lv_running"]
    lv_speed = traces_dict[event_uuid]["lv_speed"]
    # cut up data into segments
    lv_totdist_bl = lv_totdist[first_frame_bl:last_frame_bl]
    lv_totdist_sz = lv_totdist[n_bl_frames:n_bl_frames+n_sz_frames]
    lv_totdist_am = lv_totdist[first_frame_am:last_frame_am]
    
    lv_totdist_abs_bl = lv_totdist_abs[first_frame_bl:last_frame_bl]
    lv_totdist_abs_sz = lv_totdist_abs[n_bl_frames:n_bl_frames+n_sz_frames]
    lv_totdist_abs_am = lv_totdist_abs[first_frame_am:last_frame_am]
    
    lv_running_bl = lv_running[first_frame_bl:last_frame_bl]
    lv_running_sz = lv_running[n_bl_frames:n_bl_frames+n_sz_frames]
    lv_running_am = lv_running[first_frame_am:last_frame_am]
    
    lv_speed_bl = lv_speed[first_frame_bl:last_frame_bl]
    lv_speed_sz = lv_speed[n_bl_frames:n_bl_frames+n_sz_frames]
    lv_speed_am = lv_speed[first_frame_am:last_frame_am]
    
    
    # calculate statistics
    totdist_bl = lv_totdist_bl[-1] - lv_totdist_bl[0]
    totdist_sz = lv_totdist_sz[-1] - lv_totdist_sz[0]
    totdist_am = lv_totdist_am[-1] - lv_totdist_am[0]
    totdist_abs_bl = lv_totdist_abs_bl[-1] - lv_totdist_abs_bl[0]
    totdist_abs_sz = lv_totdist_abs_sz[-1] - lv_totdist_abs_sz[0]
    totdist_abs_am = lv_totdist_abs_am[-1] - lv_totdist_abs_am[0]
    running_bl = sum(lv_running_bl)
    running_sz = sum(lv_running_sz)
    running_am = sum(lv_running_am)
    speed_bl = sum(lv_speed_bl)
    speed_sz = sum(lv_speed_sz)
    speed_am = sum(lv_speed_am)
    
    # number of running episodes, length
    
    list_episodes_bl = get_episodes(lv_running_bl, True, 8)
    n_episodes_bl = len(list_episodes_bl)
        
    list_episodes_sz = get_episodes(lv_running_sz, True, 8)
    n_episodes_sz = len(list_episodes_sz)
    
    list_episodes_am = get_episodes(lv_running_am, True, 8)
    n_episodes_am = len(list_episodes_am)
    
    # add to episodes dict
    if mouse_id not in dict_episodes.keys():
        dict_episodes[mouse_id] = dict()
    dict_episodes[mouse_id][event_uuid] = dict()
    
    list_episodes_bl = np.array(list_episodes_bl)
    list_episodes_sz = np.array(list_episodes_sz)
    list_episodes_am = np.array(list_episodes_am)
    
    dict_episodes[mouse_id][event_uuid]["bl"] = list_episodes_bl
    dict_episodes[mouse_id][event_uuid]["sz"] = list_episodes_sz
    dict_episodes[mouse_id][event_uuid]["am"] = list_episodes_am
    
    # calculate mean episode length, std
    bl_episode_mean_len = list_episodes_bl.mean()
    sz_episode_mean_len = list_episodes_sz.mean()
    am_episode_mean_len = list_episodes_am.mean()
    
    bl_episode_std = list_episodes_bl.std()
    sz_episode_std = list_episodes_sz.std()
    am_episode_std = list_episodes_am.std()
    
    
    if "exp_type" in traces_meta_dict[event_uuid].keys():
        exp_type = traces_meta_dict[event_uuid]["exp_type"]
    else:  # if not noted, assume TMEV recordings
        exp_type = "tmev"
    # add to data list
    list_statistics.append([event_uuid, mouse_id, win_type, "bl", n_bl_frames, totdist_bl, totdist_abs_bl, running_bl, speed_bl, n_episodes_bl, bl_episode_mean_len, bl_episode_std, exp_type])
    list_statistics.append([event_uuid, mouse_id, win_type, "sz", n_sz_frames, totdist_sz, totdist_abs_sz, running_sz, speed_sz, n_episodes_sz, sz_episode_mean_len, sz_episode_std, exp_type])
    list_statistics.append([event_uuid, mouse_id, win_type, "am", n_am_frames, totdist_am, totdist_abs_am, running_am, speed_am, n_episodes_am, am_episode_mean_len, am_episode_std, exp_type])

In [None]:
df_stats = pd.DataFrame(data=list_statistics, columns=["event_uuid", "mouse_id", "window_type", "segment_type",  "segment_length", "totdist", "totdist_abs", "running", "speed", "running_episodes", "running_episodes_mean_length", "running_episodes_length_std", "exp_type"])

In [None]:
# set NaN to 0 (running_episodes_mean_length: if no episodes, then mean segment length is 0)
df_stats["running_episodes_mean_length"] = df_stats["running_episodes_mean_length"].fillna(value=0)

In [None]:
df_stats["totdist_norm"] = df_stats["totdist"]/df_stats["segment_length"]
df_stats["totdist_abs_norm"] = df_stats["totdist_abs"]/df_stats["segment_length"]
df_stats["running_norm"] = df_stats["running"]/df_stats["segment_length"]
df_stats["speed_norm"] = df_stats["speed"]/df_stats["segment_length"]

### Add % of time spent running

In [None]:
# % of time spent running
df_stats["running%"] = 100*df_stats["running"]/df_stats["segment_length"]  # get value as true % instead of [0, 1] float

### Add  color codes to entries

In [None]:
df_stats["color"] = df_stats.apply(lambda row: dict_colors_mouse[row["mouse_id"]], axis=1)

In [None]:
dict_colors_event = df_stats[["event_uuid", "color"]].to_dict(orient="list")
dict_colors_event = dict(zip(dict_colors_event["event_uuid"], dict_colors_event["color"]))

## Create per-mouse means

In [None]:
df_stats_per_mouse_mean = df_stats.drop(columns=["event_uuid", "window_type", "color"], axis=0).groupby(["mouse_id", "exp_type", "segment_type"]).agg(func="mean").reset_index()

In [None]:
df_stats_per_mouse_mean["window_type"] = df_stats_per_mouse_mean.apply(lambda row: ddoc.getMouseWinInjInfo(row["mouse_id"]).iloc[0].window_type, axis=1)
df_stats_per_mouse_mean["color"] = df_stats_per_mouse_mean.apply(lambda row: df_colors[df_colors["mouse_id"] == row["mouse_id"]].iloc[0].color, axis=1)

# 1. TMEV

# Plot results

In [None]:
fig = plt.figure(figsize=(10,10))
sns.violinplot(x="segment_type", y="running_norm", data=df_stats)
#sns.stripplot(data=df_stats[df_stats["window_type"]=="CA1"], x="speed_norm", y="segment_type", hue="mouse_id", dodge=True, zorder=1, legend=False)
plt.show()

In [None]:
loco_statistic = "totdist_abs"

fig, axs = plt.subplots(2, 3, figsize=(22,12), sharey=False)
sns.pointplot(data=df_stats, x="segment_type", y=loco_statistic, ax=axs[0][0])
sns.pointplot(data=df_stats, x="segment_type", y="running", ax=axs[0][1])
sns.pointplot(data=df_stats, x="segment_type", y="speed", ax=axs[0][2])

sns.pointplot(data=df_stats[df_stats["segment_type"] == "bl"], x="window_type", y=loco_statistic, join=False, capsize=.1, ax=axs[1][0])
sns.pointplot(data=df_stats[df_stats["segment_type"] == "sz"], x="window_type", y=loco_statistic, join=False, capsize=.1, ax=axs[1][1])
sns.pointplot(data=df_stats[df_stats["segment_type"] == "am"], x="window_type", y=loco_statistic, join=False, capsize=.1, ax=axs[1][2])
if save_figs:
    fig_fpath = os.path.join(output_folder, f'loco_mean_per_segment_{get_datetime_for_fname()}_{output_version}{file_format}')
    plt.savefig(fig_fpath, format=file_format.split(".")[-1])
    print(f"Saved to {fig_fpath}")
plt.show()

In [None]:
loco_statistic = "totdist_abs"

fig, axs = plt.subplots(2, 3, figsize=(22,12))
sns.lineplot(data=df_stats, x="segment_type", y=loco_statistic, hue="event_uuid", palette=dict_colors_event, ax=axs[0][0], legend=False)
sns.lineplot(data=df_stats, x="segment_type", y="running", hue="event_uuid", palette=dict_colors_event, ax=axs[0][1], legend=False)
sns.lineplot(data=df_stats, x="segment_type", y="speed", hue="event_uuid", palette=dict_colors_event, ax=axs[0][2], legend=False)


sns.stripplot(data=df_stats[df_stats["segment_type"] == "bl"], x="window_type", hue="event_uuid", palette=dict_colors_event, y=loco_statistic, size=8, ax=axs[1][0], legend=False)
sns.stripplot(data=df_stats[df_stats["segment_type"] == "sz"], x="window_type", hue="event_uuid", palette=dict_colors_event, y=loco_statistic, size=8, ax=axs[1][1], legend=False)
sns.stripplot(data=df_stats[df_stats["segment_type"] == "am"], x="window_type", hue="event_uuid", palette=dict_colors_event, y=loco_statistic, size=8, ax=axs[1][2], legend=False)

    
sns.violinplot(
    data=df_stats[df_stats["segment_type"] == "bl"],
    x="window_type", y=loco_statistic, 
    hue_order=[True, False], split=True,
    hue=True,
    palette=["lightgrey"],
    ax=axs[1][0]
)
axs[1][0].legend_=None

sns.violinplot(
    data=df_stats[df_stats["segment_type"] == "sz"],
    x="window_type", y=loco_statistic, 
    hue_order=[True, False], split=True,
    hue=True,
    palette=["lightgrey"],
    ax=axs[1][1]
)
axs[1][1].legend_=None

sns.violinplot(
    data=df_stats[df_stats["segment_type"] == "am"],
    x="window_type", y=loco_statistic, 
    hue_order=[True, False], split=True,
    hue=True,
    palette=["lightgrey"],
    ax=axs[1][2]
)
axs[1][2].legend_=None


axs[1][0].set_title("baseline")
axs[1][1].set_title("seizure")
axs[1][2].set_title("aftermath")


#axs[1][0].set_alpha(0.5)
# found violin plot surfaces by trial and error below, for making them transparent:
plt.setp(axs[1][0].collections[-2], alpha=.3)
plt.setp(axs[1][0].collections[-4], alpha=.3)
plt.setp(axs[1][1].collections[-2], alpha=.3)
plt.setp(axs[1][1].collections[-4], alpha=.3)
plt.setp(axs[1][2].collections[-2], alpha=.3)
plt.setp(axs[1][2].collections[-4], alpha=.3)

plt.tight_layout()

if save_figs:
    fig_fpath = os.path.join(output_folder, f'loco_per_segment_{get_datetime_for_fname()}_{output_version}{file_format}')
    plt.savefig(fig_fpath, format=file_format.split(".")[-1])
    print(f"Saved to {fig_fpath}")
    


plt.show()

## Plot individual recordings, color-coded by mouse ID

### Plot all possible metrics

In [None]:
fig, axs = plt.subplots(2, 3, figsize=(22,16))
sns.lineplot(data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y="totdist_abs", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][0], marker="o", markersize=20, legend=False)
sns.lineplot(data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y="running", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][1], marker="o", markersize=20, legend=False)
sns.lineplot(data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y="speed", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][2], marker="o", markersize=20, legend=False)
sns.lineplot(data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y="running%", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[1][0], marker="o", markersize=20, legend=False)
sns.lineplot(data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y="running_episodes", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[1][1], marker="o", markersize=20, legend=False)
sns.lineplot(data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y="running_episodes_mean_length", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[1][2], marker="o", markersize=20, legend=False)


plt.tight_layout()

if save_figs and False:  # outdated figure, do not save
    fig_fpath = os.path.join(output_folder, f'loco_per_segment_all_sources_sz_excluded_{get_datetime_for_fname()}_{output_version}{file_format}')
    plt.savefig(fig_fpath, format=file_format.split(".")[-1])
    print(f"Saved to {fig_fpath}")
plt.show()

In [None]:
fig, axs = plt.subplots(2, 3, figsize=(22,16))
sns.lineplot(data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y="totdist_abs", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][0],  linestyle="-", marker="o", markersize=13, linewidth=1, legend=False)
sns.despine(left=True, bottom=True, top=True, right=True)
axs[0][0].set(xlabel='Segment', ylabel='Total (absolute) distance, a.u.')
axs[0][0].set_xticklabels(["baseline","aftermath"])

sns.lineplot(data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y="running%", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][1],  linestyle="-", marker="o", markersize=13, linewidth=1,  legend=False)
axs[0][1].set(xlabel='Segment', ylabel='% of time spent with locomotion')
axs[0][1].set_xticklabels(["baseline","aftermath"])

sns.lineplot(data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y="running_episodes", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][2],  linestyle="-", marker="o", markersize=13, linewidth=1,  legend=False)
axs[0][2].set(xlabel='Segment', ylabel='Number of running episodes')
axs[0][2].set_xticklabels(["baseline","aftermath"])

sns.lineplot(data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y="running", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[1][0],  linestyle="-", marker="o", markersize=13, linewidth=1,  legend=False)
axs[1][0].set(xlabel='Segment', ylabel='Frames spent with locomotion')
axs[1][0].set_xticklabels(["baseline","aftermath"])

sns.lineplot(data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y="speed", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[1][1],  linestyle="-", marker="o", markersize=13, linewidth=1,  legend=False)
axs[1][1].set(xlabel='Segment', ylabel='Integrated velocity, a.u.')
axs[1][1].set_xticklabels(["baseline","aftermath"])

sns.lineplot(data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y="running_episodes_mean_length", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[1][2],  linestyle="-", marker="o", markersize=13, linewidth=1,  legend=False)
axs[1][2].set(xlabel='Segment', ylabel='Mean length of running episodes (a.u.)')
axs[1][2].set_xticklabels(["baseline","aftermath"])

plt.tight_layout()

if save_figs and False:  # outdated figure, do not save
    fig_fpath = os.path.join(output_folder, f'loco_per_segment_all_sources_sz_excluded_{get_datetime_for_fname()}_{output_version}{file_format}')
    plt.savefig(fig_fpath, format=file_format.split(".")[-1])
    print(f"Saved to {fig_fpath}")
plt.show()

### Plot 3 metrics along with individual points, violin plot

In [None]:
loco_statistic = "totdist_abs"

fig, axs = plt.subplots(2, 3, figsize=(22,12))
sns.lineplot(data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y=loco_statistic, hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][0], legend=False)
sns.lineplot(data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y="running", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][1], legend=False)
sns.lineplot(data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y="speed", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][2], legend=False)


sns.stripplot(data=df_stats[df_stats["segment_type"] == "bl"], x="window_type", y=loco_statistic, hue="event_uuid", palette=dict_colors_event, size=8, ax=axs[1][0], legend=False)
sns.stripplot(data=df_stats[df_stats["segment_type"] == "sz"], x="window_type", y=loco_statistic, hue="event_uuid", palette=dict_colors_event, size=8, ax=axs[1][1], legend=False)
sns.stripplot(data=df_stats[df_stats["segment_type"] == "am"], x="window_type", y=loco_statistic, hue="event_uuid", palette=dict_colors_event, size=8, ax=axs[1][2], legend=False)


sns.violinplot(
    data=df_stats[df_stats["segment_type"] == "bl"],
    x="window_type", y=loco_statistic, 
    hue_order=[True, False], split=True,
    hue=True,
    palette=["lightgrey"],
    ax=axs[1][0]
)
axs[1][0].legend_=None

sns.violinplot(
    data=df_stats[df_stats["segment_type"] == "sz"],
    x="window_type", y=loco_statistic, 
    hue_order=[True, False], split=True,
    hue=True,
    palette=["lightgrey"],
    ax=axs[1][1]
)
axs[1][1].legend_=None

sns.violinplot(
    data=df_stats[df_stats["segment_type"] == "am"],
    x="window_type", y=loco_statistic, 
    hue_order=[True, False], split=True,
    hue=True,
    palette=["lightgrey"],
    ax=axs[1][2]
)
axs[1][2].legend_=None

#axs[1][0].set_alpha(0.5)
# found violin plot surfaces by trial and error below, for making them transparent:
plt.setp(axs[1][0].collections[-2], alpha=.3)
plt.setp(axs[1][0].collections[-4], alpha=.3)
plt.setp(axs[1][1].collections[-2], alpha=.3)
plt.setp(axs[1][1].collections[-4], alpha=.3)
plt.setp(axs[1][2].collections[-2], alpha=.3)
plt.setp(axs[1][2].collections[-4], alpha=.3)


axs[1][0].set_title("baseline")
axs[1][1].set_title("seizure")
axs[1][2].set_title("aftermath")

plt.tight_layout()

if save_figs and False:  # outdated figure, do not save
    fig_fpath = os.path.join(output_folder, f'loco_per_segment_sz-excluded_{get_datetime_for_fname()}_{output_version}{file_format}')
    plt.savefig(fig_fpath, format=file_format.split(".")[-1])
    print(f"Saved to {fig_fpath}")
plt.show()

In [None]:
value_mapping = {"bl":"baseline", "sz": "Sz", "am":"post-Sz"}

In [None]:
df_stats["segment_type"] = df_stats["segment_type"].apply(lambda x: value_mapping[x])

In [None]:
df_stats_ca1_bl_am_only

In [None]:
def get_y(metric="totdist_abs"):
    # return [[event_uuid, bl_uuid1, am_uuid1], ...] doublets of the chosen metric
    return [ [uuid, g[g["segment_type"] == value_mapping["bl"]].totdist_abs.iloc[0], g[g["segment_type"] == value_mapping["am"]].totdist_abs.iloc[0]] for uuid, g in df_stats[df_stats["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])].groupby("event_uuid") ]

y_totdist_abs = get_y("totdist_abs")
y_running = get_y("running")
y_speed = get_y("speed")
#x = df_stats[df_stats["segment_type"].isin(["bl", "am"])]["segment_type"]
x = [0, 1]


fig, axs = plt.subplots(2, 3, figsize=(22,12))

df_stats_ca1_bl_am_only = df_stats[(df_stats["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])) & (df_stats["window_type"] == "CA1")]
df_stats_nc_bl_am_only = df_stats[(df_stats["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])) & (df_stats["window_type"] == "Cx")]


sns.lineplot(data=df_stats_ca1_bl_am_only, x="segment_type", y=loco_statistic, hue="event_uuid", palette=dict_colors_event, ax=axs[0][0], marker="o", markersize=20, legend=False)
sns.lineplot(data=df_stats_ca1_bl_am_only, x="segment_type", y="running%", hue="event_uuid", palette=dict_colors_event, marker="o", markersize=20,  ax=axs[0][1], legend=False)
sns.lineplot(data=df_stats_ca1_bl_am_only, x="segment_type", y="running_episodes",  hue="event_uuid", palette=dict_colors_event, marker="o", markersize=20,  ax=axs[0][2], legend=False)
axs[0][0].set_title("CA1")
axs[0][1].set_title("CA1")
axs[0][2].set_title("CA1")


sns.lineplot(data=df_stats_nc_bl_am_only, x="segment_type", y=loco_statistic, hue="event_uuid", palette=dict_colors_event, ax=axs[1][0], marker="o", markersize=20, legend=False)
sns.lineplot(data=df_stats_nc_bl_am_only, x="segment_type", y="running%", hue="event_uuid", palette=dict_colors_event, marker="o", markersize=20,  ax=axs[1][1], legend=False)
sns.lineplot(data=df_stats_nc_bl_am_only, x="segment_type", y="running_episodes",  hue="event_uuid", palette=dict_colors_event, marker="o", markersize=20,  ax=axs[1][2], legend=False)
axs[1][0].set_title("Neocortex")
axs[1][1].set_title("Neocortex")
axs[1][2].set_title("Neocortex")


axs[1][0].sharey(axs[0][0])
axs[1][1].sharey(axs[0][1])
axs[0][2].sharey(axs[1][2])  # avoid clipping of a marker in lower figure by switching which figure shares y axis with which one

#sns.violinplot(
#    data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y=loco_statistic,
#    hue_order=[True, False], split=True,
#    hue=True,
#    palette=["lightgrey"],
#    ax=axs[1][0]
#)
#axs[1][0].legend_=None

#sns.violinplot(
#    data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y="running",
#    hue_order=[True, False], split=True,
#    hue=True,
#    palette=["lightgrey"],
#    ax=axs[1][1]
#)
#axs[1][1].legend_=None

#sns.violinplot(
#    data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y="speed", 
#    hue_order=[True, False], split=True,
#    hue=True,
#    palette=["lightgrey"],
#    ax=axs[1][2]
#)
#axs[1][2].legend_=None

#axs[1][0].set_alpha(0.5)
# found violin plot surfaces by trial and error below, for making them transparent:
#plt.setp(axs[1][0].collections[-2], alpha=.3)
#plt.setp(axs[1][0].collections[-4], alpha=.3)
#plt.setp(axs[1][1].collections[-2], alpha=.3)
#plt.setp(axs[1][1].collections[-4], alpha=.3)
#plt.setp(axs[1][2].collections[-2], alpha=.3)
#plt.setp(axs[1][2].collections[-4], alpha=.3)



plt.tight_layout()

if save_figs:
    fig_fpath = os.path.join(output_folder, f'loco_per_segment_tmev_sz-excluded_{get_datetime_for_fname()}_{output_version}{file_format}')
    plt.savefig(fig_fpath, format=file_format.split(".")[-1])
    print(f"Saved to {fig_fpath}")
plt.show()

In [None]:
df_stats_bl_am_only = pd.concat([df_stats_ca1_bl_am_only, df_stats_nc_bl_am_only])

## Aggregate by mouse
estimator='mean', errorbar=('ci', 95) are the default statistics

## CA1

In [None]:
df_stats_per_mouse_mean_ca1 = df_stats_per_mouse_mean[df_stats_per_mouse_mean["window_type"] == "ca1"]

In [None]:
df_stats_per_mouse_mean_ca1["segment_type"] = df_stats_per_mouse_mean_ca1["segment_type"].apply(lambda x: value_mapping[x])

In [None]:
df_stats_per_mouse_mean_ca1_only_bl_am = df_stats_per_mouse_mean_ca1[df_stats_per_mouse_mean_ca1["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])]
df_stats_per_mouse_mean_ca1_only_bl_am = df_stats_per_mouse_mean_ca1_only_bl_am.sort_values(by=["mouse_id", "exp_type", "segment_type"])

# NC

In [None]:
df_stats_per_mouse_mean_nc = df_stats_per_mouse_mean[df_stats_per_mouse_mean["window_type"] == "cx"]

In [None]:
df_stats_per_mouse_mean_nc["segment_type"] = df_stats_per_mouse_mean_nc["segment_type"].apply(lambda x: value_mapping[x])

In [None]:
df_stats_per_mouse_mean_nc_only_bl_am = df_stats_per_mouse_mean_nc[df_stats_per_mouse_mean_nc["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])]
df_stats_per_mouse_mean_nc_only_bl_am = df_stats_per_mouse_mean_nc_only_bl_am.sort_values(by=["mouse_id", "exp_type", "segment_type"])

## Plot CA1 & NC together

In [None]:
fig, axs = plt.subplots(2, 3, figsize=(22,16), sharey=False)

#print(len(df_exp_type.mouse_id.unique()))
    
sns.lineplot(data=df_stats_per_mouse_mean_ca1_only_bl_am, x="segment_type", y="totdist_abs_norm", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[0][0], marker="o", markersize=20, legend=False)
sns.lineplot(data=df_stats_per_mouse_mean_ca1_only_bl_am, x="segment_type", y="running%", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[0][1], marker="o", markersize=20, legend=False)
sns.lineplot(data=df_stats_per_mouse_mean_ca1_only_bl_am, x="segment_type", y="running_episodes", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[0][2], marker="o", markersize=20, legend=False)
axs[0][0].set_title("CA1")
axs[0][1].set_title("CA1")
axs[0][2].set_title("CA1")

sns.lineplot(data=df_stats_per_mouse_mean_nc_only_bl_am, x="segment_type", y="totdist_abs_norm", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[1][0], marker="o", markersize=20, legend=False)
sns.lineplot(data=df_stats_per_mouse_mean_nc_only_bl_am, x="segment_type", y="running%", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[1][1], marker="o", markersize=20, legend=False)
sns.lineplot(data=df_stats_per_mouse_mean_nc_only_bl_am, x="segment_type", y="running_episodes", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[1][2], marker="o", markersize=20, legend=False)
axs[1][0].set_title("Neocortex")
axs[1][1].set_title("Neocortex")
axs[1][2].set_title("Neocortex")


axs[1][0].sharey(axs[0][0])
axs[1][1].sharey(axs[0][1])
axs[0][2].sharey(axs[1][2])  # in this case, avoid lower figure clipped marker by switching which figure shares axis with which one


plt.tight_layout()

if save_figs:
    fig_fpath = os.path.join(output_folder, f'loco_per_segment_tmev_mouse_means_{get_datetime_for_fname()}_{output_version}{file_format}')
    plt.savefig(fig_fpath, format=file_format.split(".")[-1])
    print(f"Saved to {fig_fpath}")
plt.show()

In [None]:
df_stats_per_mouse_mean_only_bl_am = pd.concat([df_stats_per_mouse_mean_ca1_only_bl_am, df_stats_per_mouse_mean_nc_only_bl_am])

# Statistical testing

## Given two pairwise matched populations (bl and am), test significance of difference between means.

### Paired t-test (Gopal K. Kanji - 100 statistical tests: Test 10, page 35, 44/257)
As the distributions are not necessarily normally distributed, in first round, this is an approximation. Alternative is Wilcoxon signed-rank test

In [None]:
from scipy.stats import ttest_rel

In [None]:
stat_data = df_stats[df_stats["segment_type"].isin(["bl", "am"])]

In [None]:
def paired_t_test(df, column_name="totdist_abs", one_sided=False, greater_expected="am"):
    am_vals = []
    bl_vals = []
    for i_g, g in stat_data.groupby("event_uuid"):
        assert (len(g[g["segment_type"] == "bl"]) == 1) and (len(g[g["segment_type"] == "am"] ) == 1)
        bl_val = g[g["segment_type"] == "bl"][column_name].values
        am_val = g[g["segment_type"] == "am"][column_name].values
        am_vals.append(am_val[0])
        bl_vals.append(bl_val[0])
    am_vals = np.array(am_vals)
    bl_vals = np.array(bl_vals)
    
    if one_sided:
        ttest_result = ttest_rel(am_vals, bl_vals)
    else:
        if greater_expected=="am":
            ttest_result = ttest_rel(am_vals, bl_vals, alternative="greater")  # first dataset (am) expected to be greater
        elif greater_expected=="bl":
            ttest_result = ttest_rel(am_vals, bl_vals, alternative="less")  # second dataset (bl) expected to be greater
        else:
            raise Error(f"paired_t_test(): invalid greater_expected value {greater_expected}")
    #print(ttest_result)
    return ttest_result

In [None]:
print(paired_t_test(stat_data, "running_episodes", False)) 
# a negative statistic would tell us that mean(totdist_am - totdist_bl) < 0, i.e. locomotion is less after sz event.

# Export datasets

In [None]:
if export_datasets:
    output_fpath = os.path.join(output_folder, f"locomotion_tmev_{get_datetime_for_fname()}.xlsx")
    df_stats_bl_am_only[["mouse_id", "window_type", "event_uuid", "segment_type", "exp_type", "segment_length", "totdist", "totdist_abs", "running", "running%", "running_episodes"]].sort_values(by=["mouse_id", "event_uuid", "segment_type"]).to_excel(output_fpath)
    print(f"Results exported to {output_fpath}")

In [None]:
if export_datasets:
    output_fpath = os.path.join(output_folder, f"locomotion_tmev_mouse_aggregate_{get_datetime_for_fname()}.xlsx")
    df_stats_per_mouse_mean_only_bl_am[["mouse_id", "window_type", "segment_type", "exp_type", "segment_length", "totdist", "totdist_abs", "running", "running%", "running_episodes"]].sort_values(by=["mouse_id", "segment_type"]).to_excel(output_fpath)
    print(f"Results exported to {output_fpath}")
    

# 2. ChR2 (bl - stim - (Sz) - am protocol)

### Rename bl -> baseline, am -> post-stimulation

In [None]:
value_mapping = {"bl":"baseline", "sz": "stimulation", "am":"post-stimulation"}

In [None]:
df_stats["segment_type"] = df_stats["segment_type"].apply(lambda x: value_mapping[x])

In [None]:
df_stats_only_bl_am = df_stats[df_stats["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])]

# Plot loco traces

## ctl

In [None]:
amplitude = 100
offset = 0.0
fig = plt.figure(figsize=(18,18))
for uuid in traces_dict.keys():
    if traces_meta_dict[uuid]["exp_type"] == "chr2_ctl":
        speed_trace = traces_dict[uuid]["lv_speed"]
        n_bl_frames = traces_meta_dict[uuid]["n_bl_frames"]
        n_am_frames = traces_meta_dict[uuid]["n_am_frames"]
        
        min_speed = min(speed_trace)
        max_speed = max(speed_trace)
        color = ddoc.getColorForUuid(uuid)
        plt.plot(amplitude*(speed_trace - min_speed)/(max_speed - min_speed)+ offset, color=color)
        plt.vlines(x=[n_bl_frames, len(speed_trace)-n_am_frames], ymin=offset, ymax=offset+amplitude, linewidth=4, color="black")
        offset += 1.2*amplitude
plt.show()

## SD

In [None]:
amplitude = 100
offset = 0.0
fig = plt.figure(figsize=(18,18))
for uuid in traces_dict.keys():
    if traces_meta_dict[uuid]["exp_type"] == "chr2_sd":
        print(uuid)
        speed_trace = traces_dict[uuid]["lv_speed"]
        n_bl_frames = traces_meta_dict[uuid]["n_bl_frames"]
        n_am_frames = traces_meta_dict[uuid]["n_am_frames"]
        
        min_speed = min(speed_trace)
        max_speed = max(speed_trace)
        color = ddoc.getColorForUuid(uuid)
        plt.plot(amplitude*(speed_trace - min_speed)/(max_speed - min_speed)+ offset, color=color)
        plt.vlines(x=[n_bl_frames, len(speed_trace)-n_am_frames], ymin=offset, ymax=offset+amplitude, linewidth=4, color="black")
        offset += 1.2*amplitude
plt.show()

# Check correctness of quantities

In [None]:
def debugPlot(uuid):
    mouse_id = traces_meta_dict[uuid]["mouse_id"]
    exp_type = traces_meta_dict[uuid]["exp_type"]
    speed_trace = traces_dict[uuid]["lv_speed"]
    totdist_abs_trace = traces_dict[uuid]["lv_totdist_abs"]
    rounds_trace = traces_dict[uuid]["lv_rounds"]
    n_bl_frames = traces_meta_dict[uuid]["n_bl_frames"]
    n_am_frames = traces_meta_dict[uuid]["n_am_frames"]
    n_frames = traces_meta_dict[uuid]["n_frames"]
    nd2_file = ddoc.getNikonFileNameForUuid(uuid)
    if use_manual_bl_am_length:
        n_sz_frames = n_frames - n_am_frames - n_bl_frames
        
        i_bl_start_index = n_bl_frames - bl_manual_length
        speed_trace = speed_trace[i_bl_start_index:]
        totdist_abs_trace = totdist_abs_trace[i_bl_start_index:]
        rounds_trace = rounds_trace[i_bl_start_index:]
        
        n_bl_frames = bl_manual_length
        
        i_am_end_index = am_manual_length - n_am_frames  # a negative number
        speed_trace = speed_trace[:i_am_end_index]
        totdist_abs_trace = totdist_abs_trace[:i_am_end_index]
        rounds_trace = rounds_trace[:i_am_end_index]
        n_am_frames = am_manual_length
        
        n_frames = len(speed_trace)
        
        assert n_frames - n_am_frames - n_bl_frames == n_sz_frames
        
        
    min_speed = min(speed_trace)
    max_speed = max(speed_trace)
    min_totdist_abs = min(totdist_abs_trace)
    max_totdist_abs = max(totdist_abs_trace)
    mean_totdist_abs = (min_totdist_abs + max_totdist_abs)/2.
    min_rounds = min(rounds_trace)
    max_rounds = max(rounds_trace)
    
    
    bl_totdist_abs = df_stats_only_bl_am[(df_stats_only_bl_am["event_uuid"] == uuid) & (df_stats_only_bl_am["segment_type"] == value_mapping["bl"])].totdist_abs.iloc[0]
    am_totdist_abs = df_stats_only_bl_am[(df_stats_only_bl_am["event_uuid"] == uuid) & (df_stats_only_bl_am["segment_type"] == value_mapping["am"])].totdist_abs.iloc[0]
    
    bl_totdist_abs_norm = df_stats_only_bl_am[(df_stats_only_bl_am["event_uuid"] == uuid) & (df_stats_only_bl_am["segment_type"] == value_mapping["bl"])].totdist_abs_norm.iloc[0]
    am_totdist_abs_norm = df_stats_only_bl_am[(df_stats_only_bl_am["event_uuid"] == uuid) & (df_stats_only_bl_am["segment_type"] == value_mapping["am"])].totdist_abs_norm.iloc[0]
    
    
    color = ddoc.getColorForUuid(uuid)

    fig, axs = plt.subplots(5,1, figsize=(18,36))
    plt.suptitle(f"{nd2_file} ({exp_type})")

    sns.lineplot(data=df_stats_only_bl_am[df_stats_only_bl_am["event_uuid"] == uuid], x="segment_type", y="totdist_abs", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0], marker="o", markersize=20, legend=False)
    sns.lineplot(data=df_stats_only_bl_am[df_stats_only_bl_am["event_uuid"] == uuid], x="segment_type", y="totdist_abs_norm", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[1], marker="o", markersize=20, legend=False)
    
    

    axs[2].plot(speed_trace, color=color)
    axs[2].vlines(x=[n_bl_frames, n_frames-n_am_frames], ymin=min_speed, ymax=max_speed, color="black")
    axs[3].plot(totdist_abs_trace, color=color)
    axs[3].vlines(x=[n_bl_frames, n_frames-n_am_frames], ymin=min_totdist_abs, ymax=max_totdist_abs, color="black")
    #axs[3].hlines(xmin=0, xmax=n_frames, y=[min_totdist_abs, max_totdist_abs, mean_totdist_abs], color="red")
    axs[4].plot(rounds_trace, color=color)
    axs[4].vlines(x=[n_bl_frames, n_frames-n_am_frames], ymin=min_rounds, ymax=max_rounds, color="black")
    
    
    
    axs[0].text(0.05, bl_totdist_abs, f"{bl_totdist_abs:.2f}")
    axs[0].text(0.85, am_totdist_abs, f"{am_totdist_abs:.2f}")
    
    axs[1].text(0.05, bl_totdist_abs_norm, f"{bl_totdist_abs_norm:.5f}")
    axs[1].text(0.9, am_totdist_abs_norm, f"{am_totdist_abs_norm:.5f}")
    
    
    
    # write totdist_abs values at beginning of bl, end of bl, beginning of am, end of am
    axs[3].text(0, 1.02*mean_totdist_abs, f"{totdist_abs_trace[0]:.2f}")
    axs[3].text(n_bl_frames-1000, 1.02*mean_totdist_abs, f"{totdist_abs_trace[n_bl_frames]:.2f}")
    axs[3].text(n_frames-n_am_frames+100, 1.02*mean_totdist_abs, f"{totdist_abs_trace[n_frames-n_am_frames]:.2f}")
    axs[3].text(n_frames-1000, 1.02*mean_totdist_abs, f"{totdist_abs_trace[n_frames-1]:.2f}")
    
    axs[3].text(0, 0.75*mean_totdist_abs, f"({totdist_abs_trace[n_bl_frames]:.2f} - {totdist_abs_trace[0]:.2f})/{n_bl_frames} = {(totdist_abs_trace[n_bl_frames]-totdist_abs_trace[0])/n_bl_frames:.5f}")
    axs[3].text(n_frames-n_am_frames+100, 0.75*mean_totdist_abs, f"({totdist_abs_trace[n_frames-1]:.2f} - {totdist_abs_trace[n_frames-n_am_frames]:.2f})/{n_am_frames} = {(totdist_abs_trace[n_frames-1]-totdist_abs_trace[n_frames-n_am_frames])/n_am_frames:.5f}")
    
    
    axs[3].text(n_bl_frames-1000, min_totdist_abs, f"delta: {(totdist_abs_trace[n_bl_frames]-totdist_abs_trace[0]):.2f}")
    axs[3].text(n_frames-1000, min_totdist_abs, f"delta: {(totdist_abs_trace[n_frames-1]-totdist_abs_trace[n_frames-n_am_frames]):.2f}")
    
    mean_rounds = (max_rounds+min_rounds)/2.
    axs[4].text(0, mean_rounds, rounds_trace[0])
    axs[4].text(n_bl_frames-1000, mean_rounds, rounds_trace[n_bl_frames-1])
    axs[4].text(n_frames-1000, mean_rounds, rounds_trace[n_frames-1])
    axs[4].text(n_frames-n_am_frames+200, mean_rounds, rounds_trace[n_frames-n_am_frames])
    
    axs[4].text(n_bl_frames-1000, 0.1*mean_rounds, f"delta: {rounds_trace[n_bl_frames-1]-rounds_trace[0]}")
    axs[4].text(n_frames-1000, 0.1*mean_rounds, f"delta: {rounds_trace[n_frames-1]-rounds_trace[n_frames-n_am_frames]}")
    
    
    
    
    plt.tight_layout()
    fname = os.path.splitext(nd2_file)[0] + "_debug.jpg"
    fpath = os.path.join(output_folder, fname)
    #plt.show()
    plt.savefig(fpath)
    print(fpath)

In [None]:
#debugPlot(list(traces_dict.keys())[0])

In [None]:
save_debug_images = False
if save_debug_images:
    for uuid in df_stats_only_bl_am.event_uuid.unique():
        debugPlot(uuid)

# Plot results

### Plot everything together (all types: ctl, sd, sz+sd...)

In [None]:
fig, axs = plt.subplots(2, 3, figsize=(22,16))

sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="totdist_abs", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][0], marker="o", markersize=20, legend=False)
sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="running", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][1], marker="o", markersize=20, legend=False)
sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="speed", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[0][2], marker="o", markersize=20, legend=False)
sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="running%", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[1][0], marker="o", markersize=20, legend=False)
sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="running_episodes", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[1][1], marker="o", markersize=20, legend=False)
sns.lineplot(data=df_stats_only_bl_am, x="segment_type", y="running_episodes_mean_length", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[1][2], marker="o", markersize=20, legend=False)



plt.tight_layout()

if save_figs:  # do not save this one; need to separate by experiment type (ctl, sz, szsd...)
    fig_fpath = os.path.join(output_folder, f'loco_per_segment_all_recordings_{get_datetime_for_fname()}_{output_version}{file_format}')
    plt.savefig(fig_fpath, format=file_format.split(".")[-1])
    print(f"Saved to {fig_fpath}")
plt.show()

### Plot each category

### Plot for CA1

In [None]:
df_stats_ca1 = df_stats[df_stats["window_type"] == "ca1"]
n_exp_types = len( df_stats_ca1.exp_type.unique())
fig, axs = plt.subplots(n_exp_types, 3, figsize=(22,8*n_exp_types))
for i_exp_type, exp_type in enumerate(df_stats_ca1.exp_type.unique()):
    df_exp_type = df_stats_only_bl_am[(df_stats_only_bl_am["exp_type"] == exp_type) & (df_stats_only_bl_am["window_type"] == "ca1")]
    #print(len(df_exp_type.mouse_id.unique()))
    axs[i_exp_type][0].set_title(f"{exp_type}")
    axs[i_exp_type][1].set_title(f"{exp_type}")
    axs[i_exp_type][2].set_title(f"{exp_type}")
    
    sns.lineplot(data=df_exp_type, x="segment_type", y="totdist_abs", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[i_exp_type][0], marker="o", markersize=20, legend=False)
    sns.lineplot(data=df_exp_type, x="segment_type", y="running%", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[i_exp_type][1], marker="o", markersize=20, legend=False)
    sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[i_exp_type][2], marker="o", markersize=20, legend=False)


plt.tight_layout()

if save_figs:
    fig_fpath = os.path.join(output_folder, f'loco_per_segment_chr2_ca1_{get_datetime_for_fname()}_{output_version}{file_format}')
    plt.savefig(fig_fpath, format=file_format.split(".")[-1])
    print(f"Saved to {fig_fpath}")
plt.show()

### Plot for NC

In [None]:
df_stats_nc = df_stats[df_stats["window_type"] == "cx"]
if len(df_stats_nc) > 0:
    n_exp_types = len(df_stats_nc.exp_type.unique())
    fig, axs = plt.subplots(n_exp_types, 3, figsize=(22,8*n_exp_types))

    for i_exp_type, exp_type in enumerate(df_stats_nc.exp_type.unique()):
        df_exp_type = df_stats_only_bl_am[(df_stats_only_bl_am["exp_type"] == exp_type) & (df_stats_only_bl_am["window_type"] == "cx")]
        #print(len(df_exp_type.mouse_id.unique()))
        axs[i_exp_type][0].set_title(f"{exp_type}")
        axs[i_exp_type][1].set_title(f"{exp_type}")
        axs[i_exp_type][2].set_title(f"{exp_type}")

        sns.lineplot(data=df_exp_type, x="segment_type", y="totdist_abs_norm", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[i_exp_type][0], marker="o", markersize=20, legend=False)
        sns.lineplot(data=df_exp_type, x="segment_type", y="running%", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[i_exp_type][1], marker="o", markersize=20, legend=False)
        sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes", hue="event_uuid", palette=dict_colors_event, estimator=None, ax=axs[i_exp_type][2], marker="o", markersize=20, legend=False)


    plt.tight_layout()

    if save_figs:
        fig_fpath = os.path.join(output_folder, f'loco_per_segment_chr2_nc_{get_datetime_for_fname()}_{output_version}{file_format}')
        plt.savefig(fig_fpath, format=file_format.split(".")[-1])
        print(f"Saved to {fig_fpath}")
    plt.show()

# Mouse-aggregate
i.e. mean per mouse

## CA1

In [None]:
df_stats_per_mouse_mean_ca1 = df_stats_per_mouse_mean[df_stats_per_mouse_mean["window_type"] == "ca1"]
df_stats_per_mouse_mean_ca1["segment_type"] = df_stats_per_mouse_mean_ca1["segment_type"].apply(lambda x: value_mapping[x])

In [None]:
df_stats_per_mouse_mean_ca1_only_bl_am = df_stats_per_mouse_mean_ca1[df_stats_per_mouse_mean_ca1["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])]

In [None]:
df_stats_per_mouse_mean_ca1_only_bl_am = df_stats_per_mouse_mean_ca1_only_bl_am.sort_values(by=["mouse_id", "exp_type", "segment_type"])

In [None]:
n_exp_types = len( df_stats_per_mouse_mean_ca1.exp_type.unique())
fig, axs = plt.subplots(n_exp_types, 3, figsize=(22,8*n_exp_types))
for i_exp_type, exp_type in enumerate(df_stats_per_mouse_mean_ca1_only_bl_am.exp_type.unique()):
    df_exp_type = df_stats_per_mouse_mean_ca1_only_bl_am[(df_stats_per_mouse_mean_ca1_only_bl_am["exp_type"] == exp_type) & (df_stats_per_mouse_mean_ca1_only_bl_am["window_type"] == "ca1")]
    #print(len(df_exp_type.mouse_id.unique()))
    axs[i_exp_type][0].set_title(f"{exp_type}")
    axs[i_exp_type][1].set_title(f"{exp_type}")
    axs[i_exp_type][2].set_title(f"{exp_type}")
    
    sns.lineplot(data=df_exp_type, x="segment_type", y="totdist_abs_norm", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][0], marker="o", markersize=20, legend=False)
    sns.lineplot(data=df_exp_type, x="segment_type", y="running%", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][1], marker="o", markersize=20, legend=False)
    sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][2], marker="o", markersize=20, legend=False)


plt.tight_layout()

if save_figs:
    fig_fpath = os.path.join(output_folder, f'loco_per_segment_chr2_ca1_mouse_means_{get_datetime_for_fname()}_{output_version}{file_format}')
    plt.savefig(fig_fpath, format=file_format.split(".")[-1])
    print(f"Saved to {fig_fpath}")
plt.show()

## NC

In [None]:
if len(df_stats_nc) > 0:
    df_stats_per_mouse_mean_nc = df_stats_per_mouse_mean[df_stats_per_mouse_mean["window_type"] == "cx"]
    df_stats_per_mouse_mean_nc["segment_type"] = df_stats_per_mouse_mean_nc["segment_type"].apply(lambda x: value_mapping[x])

In [None]:
if len(df_stats_nc) > 0:
    df_stats_per_mouse_mean_nc_only_bl_am = df_stats_per_mouse_mean_nc[df_stats_per_mouse_mean_nc["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])]

In [None]:
if len(df_stats_nc) > 0:
    df_stats_per_mouse_mean_nc_only_bl_am = df_stats_per_mouse_mean_nc_only_bl_am.sort_values(by=["mouse_id", "exp_type", "segment_type"])

In [None]:
if len(df_stats_nc) > 0:
    n_exp_types = len( df_stats_per_mouse_mean_nc.exp_type.unique())
    fig, axs = plt.subplots(n_exp_types, 3, figsize=(22,8*n_exp_types))
    for i_exp_type, exp_type in enumerate(df_stats_per_mouse_mean_nc.exp_type.unique()):
        df_exp_type = df_stats_per_mouse_mean_nc[(df_stats_per_mouse_mean_nc["exp_type"] == exp_type) & (df_stats_per_mouse_mean_nc["window_type"] == "cx")]
        #print(len(df_exp_type.mouse_id.unique()))
        axs[i_exp_type][0].set_title(f"{exp_type}")
        axs[i_exp_type][1].set_title(f"{exp_type}")
        axs[i_exp_type][2].set_title(f"{exp_type}")

        sns.lineplot(data=df_exp_type, x="segment_type", y="totdist_abs_norm", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][0], marker="o", markersize=20, legend=False)
        sns.lineplot(data=df_exp_type, x="segment_type", y="running%", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][1], marker="o", markersize=20, legend=False)
        sns.lineplot(data=df_exp_type, x="segment_type", y="running_episodes", hue="mouse_id", palette=dict_colors_mouse, estimator=None, ax=axs[i_exp_type][2], marker="o", markersize=20, legend=False)


    plt.tight_layout()

    if save_figs:
        fig_fpath = os.path.join(output_folder, f'loco_per_segment_chr2_ca1_mouse_means_{get_datetime_for_fname()}_{output_version}{file_format}')
        plt.savefig(fig_fpath, format=file_format.split(".")[-1])
        print(f"Saved to {fig_fpath}")
    plt.show()

# Statistical testing

In [None]:
from scipy.stats import ttest_rel

In [None]:
def paired_t_test(df, column_name="totdist_abs", one_sided=False, greater_expected=value_mapping["am"], pair_by="event_uuid"):
    am_vals = []
    bl_vals = []
    for i_g, g in df.groupby(pair_by):
        assert (len(g[g["segment_type"] == value_mapping["bl"]]) == 1) and (len(g[g["segment_type"] == value_mapping["am"]] ) == 1)
        bl_val = g[g["segment_type"] == value_mapping["bl"]][column_name].values
        am_val = g[g["segment_type"] == value_mapping["am"]][column_name].values
        am_vals.append(am_val[0])
        bl_vals.append(bl_val[0])
    am_vals = np.array(am_vals)
    bl_vals = np.array(bl_vals)
    
    if one_sided:
        ttest_result = ttest_rel(am_vals, bl_vals)
    else:
        if greater_expected==value_mapping["am"]:
            ttest_result = ttest_rel(am_vals, bl_vals, alternative="greater")  # first dataset (am) expected to be greater
        elif greater_expected==value_mapping["bl"]:
            ttest_result = ttest_rel(am_vals, bl_vals, alternative="less")  # second dataset (bl) expected to be greater
        else:
            raise Exception(f"paired_t_test(): invalid greater_expected value {greater_expected}")
    #print(ttest_result)
    return ttest_result

## CA1

In [None]:
stat_data_ca1 = df_stats[(df_stats["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])) & (df_stats["window_type"] == "ca1")]

In [None]:
stat_data_ca1.mouse_id.unique()

In [None]:
print("CA1")
for statistic in ["totdist_abs_norm", "running%", "running_episodes"]:
    print(statistic)
    for exp_type, exp_g in stat_data_ca1.groupby("exp_type"):
        ttest_result = paired_t_test(exp_g, statistic)
        print(f'\t{exp_type}: {len(exp_g[exp_g["segment_type"] == value_mapping["bl"]])} pairs;\n\tstatistic={ttest_result.statistic}, p={ttest_result.pvalue}\n')


## NC

In [None]:
if len(df_stats_nc) > 0:
    stat_data_nc = df_stats[(df_stats["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])) & (df_stats["window_type"] == "cx")]

In [None]:
if len(df_stats_nc) > 0:
    print("NC")
    for statistic in ["totdist_abs_norm", "running%", "running_episodes"]:
        print(statistic)
        for exp_type, exp_g in stat_data_nc.groupby("exp_type"):
            ttest_result = paired_t_test(exp_g, statistic)
            print(f'\t{exp_type}: {len(exp_g[exp_g["segment_type"] == value_mapping["bl"]])} pairs;\n\tstatistic={ttest_result.statistic}, p={ttest_result.pvalue}\n')


## Mouse-aggregate

### CA1

In [None]:
stat_data_mouse_means_ca1 = df_stats_per_mouse_mean_ca1[(df_stats_per_mouse_mean_ca1["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])) & (df_stats_per_mouse_mean_ca1["window_type"] == "ca1")]

In [None]:
print("CA1")
for statistic in ["totdist_abs_norm", "running%", "running_episodes"]:
    print(statistic)
    for exp_type, exp_g in stat_data_mouse_means_ca1.groupby("exp_type"):
        ttest_result = paired_t_test(exp_g, statistic, pair_by="mouse_id")
        print(f'\t{exp_type}: {len(exp_g[exp_g["segment_type"] == value_mapping["bl"]])} pairs;\n\tstatistic={ttest_result.statistic}, p={ttest_result.pvalue}\n')


### NC

In [None]:
if len(df_stats_nc) > 0:
    stat_data_mouse_means_nc = df_stats_per_mouse_mean_nc[(df_stats_per_mouse_mean_nc["segment_type"].isin([value_mapping["bl"], value_mapping["am"]])) & (df_stats_per_mouse_mean_nc["window_type"] == "cx")]

In [None]:
if len(df_stats_nc) > 0:
    print("NC")
    for statistic in ["totdist_abs_norm", "running%", "running_episodes"]:
        print(statistic)
        for exp_type, exp_g in stat_data_mouse_means_nc.groupby("exp_type"):
            ttest_result = paired_t_test(exp_g, statistic, pair_by="mouse_id")
            print(f'\t{exp_type}: {len(exp_g[exp_g["segment_type"] == value_mapping["bl"]])} pairs;\n\tstatistic={ttest_result.statistic}, p={ttest_result.pvalue}\n')


# Export results

In [None]:
#df_stats[(df_stats["segment_type"].isin(["bl", "am"]))].to_excel("C:\\Users\\Bence\\Downloads\\loco_dataset_raw.xlsx", index=False)

In [None]:
df_to_save = df_stats[(df_stats["segment_type"].isin([value_mapping["bl"], value_mapping["am"]]))]

In [None]:

if export_datasets:
    output_fpath = os.path.join(output_folder, f"locomotion_ChR2_{get_datetime_for_fname()}.xlsx")
    df_to_save.to_excel(output_fpath)
    print(f"Results exported to {output_fpath}")
    

In [None]:
if export_datasets:
    output_fpath = os.path.join(output_folder, f"locomotion_ChR2_mouse_aggregate_{get_datetime_for_fname()}.xlsx")
    df_stats_per_mouse_mean_ca1_only_bl_am.to_excel(output_fpath)
    print(f"Results exported to {output_fpath}")
    

In [None]:
if export_datasets and False:
    for win_type in df_stats.window_type.unique():
        for exp_type in df_stats[df_stats["window_type"] == win_type].exp_type.unique():
            for loco_metric in ["totdist_abs_norm", "running%", "running_episodes"]:
                fname = f"C:\\Users\\Bence\\Downloads\\loco_{loco_metric}_{win_type}_{exp_type}_{get_datetime_for_fname()}.xlsx"
                a = df_stats[(df_stats["segment_type"].isin([value_mapping["bl"], value_mapping["am"]]))& (df_stats["window_type"] == win_type) & (df_stats["exp_type"] == exp_type) ][["event_uuid", "mouse_id", "segment_type", loco_metric]].pivot(index='event_uuid', columns='segment_type', values=loco_metric).reset_index()
                a["mouse_id"] = a.apply(lambda row: df_stats[df_stats["event_uuid"] == row["event_uuid"]].mouse_id.iloc[0], axis=1)
                a.to_excel(fname, index=False)
                print(f"Saved to {fname}")