# Locomotion during seizure vs during SD
Compare the locomotion (same quantities as in Locomotion analysis 3.0) during optically visible seizure that ends when the SD appears in the window, with a same interval directly following.

In [None]:
from collections import OrderedDict

In [None]:
dict_metric_label = OrderedDict([("totdist_abs", "Total (absolute) distance, a.u."),
                                ("running%", "% of time spent with locomotion"), 
                                ("running_episodes", "Number of running episodes"),
                                ("avg_speed", "Average of locomotion velocity"),
                                ("running_episodes_mean_length", "Mean length of running episodes, a.u."),
                                ("max_speed", "Max velocity of locomotion, a.u.")])

In [None]:
STAT_METRICS = ["totdist_abs", "running%", "running_episodes", "avg_speed", "running_episodes_mean_length", "max_speed"]  # metrics to test for

In [None]:
AMPL_THRESHOLD = 0.2  # threshold that one element within the running episode candidate has to be reached for the episode to not be discarded.
TEMP_THRESHOLD = 15  # in number of frames. In 15 Hz, this amounts to 1 s threshold that a candidate episode has to reach to not be discarded. 
EPISODE_MERGE_THRESHOLD_FRAMES = 8  # merge running episodes if temporal distance distance smaller than this many frames or equal (15 Hz!)

# Set version

### Set up export figure parameters

In [None]:
save_data = False  # export results of this script?
save_sanity_check = False  # make sure to set save_figs to True as well
save_waterfall = False

In [None]:
save_figs = True  # set to True to save the figures created
save_as_eps = False
save_as_pdf = True
if save_as_pdf:
    file_format = ".pdf"
elif save_as_eps:
    file_format = ".eps"
else:
    file_format = ".jpg"
if save_figs:
    print(f"Going to save figures as {file_format} files.")

# Import libraries

In [None]:
#Auto-reload modules (used to develop functions outside this notebook)
%load_ext autoreload
%autoreload 2

In [None]:
import labrotation.file_handling as fh
import h5py
from time import time
import matplotlib.pyplot as plt
import numpy as np
import os
from labrotation import file_handling as fh
from copy import deepcopy
import pandas as pd
import labrotation.two_photon_session as tps
import seaborn as sns
import uuid  # for unique labeling of sessions and coupling arrays (mouse velocity, distance, ...) to sessions in dataframe 
from matplotlib import cm  # colormap
import datadoc_util
from labrotation import two_photon_session as tps
from datetime import datetime
import seaborn as sns
from math import floor
import matlab.engine  # for saving data to workspace
from scipy.stats import ttest_rel
import json
from loco_functions import apply_threshold, get_episodes, calculate_avg_speed, calculate_max_speed, get_trace_delta

# Set seaborn parameters

In [None]:
sns.set(font_scale=3)
sns.set_style("whitegrid")

# If exists, load environmental variables from .env file

In [None]:
env_dict = dict()
if not os.path.exists("./.env"):
    print(".env does not exist")
else:
    with open("./.env", "r") as f:
        for line in f.readlines():
            l = line.rstrip().split("=")
            env_dict[l[0]] = l[1]
print(env_dict.keys())

# Set up data documentation directory

In [None]:
# assumption: inside the documentation folder, the subfolders carry the id of each mouse (not exact necessarily, but they 
# can be identified by the name of the subfolder). 
# Inside the subfolder xy (for mouse xy), xy_grouping.xlsx and xy_segmentation.xlsx can be found.
# xy_grouping.xlsx serves the purpose of finding the recordings belonging together, and has columns:
# folder, nd2, labview, lfp, face_cam_last, nikon_meta, experiment_type, day
# xy_segmentation.xlsx contains frame-by-frame (given by a set of disjoint intervals forming a cover for the whole recording) 
# classification of the events in the recording ("normal", seizure ("sz"), sd wave ("sd_wave") etc.). The columns:
# folder, interval_type, frame_begin, frame_end.

# TODO: write documentation on contents of xlsx files (what the columns are etc.)
if "DATA_DOCU_FOLDER" in env_dict.keys():
    docu_folder = env_dict["DATA_DOCU_FOLDER"]
else:
    docu_folder = fh.open_dir("Choose folder containing folders for each mouse!")
print(f"Selected folder:\n\t{docu_folder}")

In [None]:
if "documentation" in os.listdir(docu_folder):
    mouse_folder = os.path.join(docu_folder, "documentation")
else:
    mouse_folder = docu_folder
mouse_names = os.listdir(mouse_folder)
print(f"Mice detected:")
for mouse in mouse_names:
    print(f"\t{mouse}")

In [None]:
def get_datetime_for_fname():
    now = datetime.now()
    return f"{now.year:04d}{now.month:02d}{now.day:02d}-{now.hour:02d}{now.minute:02d}{now.second:02d}"

In [None]:
output_folder = env_dict["DOWNLOADS_FOLDER"]
print(f"Output files will be saved to {output_folder}")

## Set a uniform datetime string for output files

In [None]:
output_dtime = get_datetime_for_fname()

### Load matlab-2p

In [None]:
if "MATLAB_2P_FOLDER" in env_dict.keys():
    matlab_2p_folder = env_dict["MATLAB_2P_FOLDER"]
else:
    matlab_2p_folder = fh.open_dir("Choose matlab-2p folder")
print(f"matlab-2p folder set to:\n\t{matlab_2p_folder}")

### Load data documentation

In [None]:
ddoc = datadoc_util.DataDocumentation(docu_folder)
ddoc.loadDataDoc()

### Set up color coding
for now, only possible to assign a color to each mouse. Later, when event uuids available, need to map event uuid to color code

In [None]:
df_colors = ddoc.getColorings()

In [None]:
dict_colors_mouse = df_colors[["mouse_id", "color"]].to_dict(orient="list")
dict_colors_mouse = dict(zip(dict_colors_mouse["mouse_id"], dict_colors_mouse["color"]))

In [None]:
#dict_colors_mouse["T413"] = "#000000"  # set one to black

### Load events_list dataset

In [None]:
events_list_fpath = os.path.join(docu_folder, "events_list.xlsx")
assert os.path.exists(events_list_fpath)

df_events_list = pd.read_excel(events_list_fpath)

## Load dataset

In [None]:
assembled_traces_fpath = fh.open_file("Open assembled_traces h5 file!")
print(assembled_traces_fpath)

In [None]:
is_chr2 = False
is_bilat = False
if "chr2" in assembled_traces_fpath.lower():
    is_chr2 = True
    print("ChR2 dataset detected")
elif "bilat" in assembled_traces_fpath.lower():
    is_bilat = True
    print("Bilat stim dataset detected")

In [None]:
if is_chr2:
    used_mouse_ids = ["OPI-2239", "WEZ-8917", "WEZ-8924", "WEZ-8922"]
elif is_bilat:
    used_mouse_ids = ["WEZ-8946", "WEZ-8960", "WEZ-8961"]

In [None]:
dataset_type = "chr2" if is_chr2 else "bilat" if is_bilat else "tmev" 
if not is_chr2:  # for TMEV, also save pooled CA1+NC statistics
    pool_tmev = True
else:
    pool_tmev = False

In [None]:
traces_dict = dict()  
traces_meta_dict = dict()
# first keys are event uuids, inside the following dataset names:
# 'lfp_mov_t', 'lfp_mov_y', 'lfp_t', 'lfp_y', 'lv_dist', 'lv_rounds', 
# 'lv_running', 'lv_speed', 'lv_t_s', 'lv_totdist', 'mean_fluo'
with h5py.File(assembled_traces_fpath, "r") as hf:
    for uuid in hf.keys():
        if (not is_chr2) or (hf[uuid].attrs["mouse_id"] in used_mouse_ids):
            session_dataset_dict = dict() 
            session_meta_dict = dict()
            for dataset_name in hf[uuid].keys():
                session_dataset_dict[dataset_name] = np.array(hf[uuid][dataset_name])
            for attr_name in hf[uuid].attrs:
                session_meta_dict[attr_name] = hf[uuid].attrs[attr_name]
            traces_dict[uuid] = session_dataset_dict.copy()
            traces_meta_dict[uuid] = session_meta_dict.copy()

### Get locomotion amplitude by finding min and max LabView speed entries

In [None]:
min_speed = np.inf
max_speed = -np.inf
for e_uuid in traces_dict.keys():
    speed = traces_dict[e_uuid]["lv_speed"]
    min_candidate = np.min(speed)
    max_candidate = np.max(speed)
    if min_candidate < min_speed:
        min_speed = min_candidate
    if max_candidate > max_speed:
        max_speed = max_candidate
print(f"Speed range: {min_speed} to {max_speed}")

In [None]:
LV_SPEED_AMPL = max_speed - min_speed

In [None]:
# for TMEV, there should be frames 0, start of SZ (5000, the length of baseline in each trace), start of SD (i.e. start of aftermath) as segment type break points.
# for ChR2, there should be frames 0, stim start, stim end+1, and other, variable number of entries as segment type break points, depending on stim type and observed number of SD 
# so ChR2 needs adaptation...
for uuid in traces_meta_dict.keys():
    assert len(traces_meta_dict[uuid]["segment_type_break_points"]) == 3

## Get windows for comparison
For TMEV, start of seizure until start of SD (i.e. segment_type_break_points[1], [2]) mark beginning and end of seizure; then need to take the same length window starting with [2])

In [None]:
# each entry (row) should have columns: 
# uuid of event, mouse id, window type, segment type (bl/sz/am), segment length in frames, totdist, running, speed
list_statistics = []  
dict_episodes = {}
loco_binary_traces = {}  # contains the post-filtering "running" trace, of which the running% is calculated (divided by segment length)
loco_episodes = {}  # contains the first and last indices of the locomotion episodes
dict_begin_end_frames = {}

for e_uuid in traces_dict.keys():
    mouse_id = traces_meta_dict[e_uuid]["mouse_id"]
    win_type = traces_meta_dict[e_uuid]["window_type"]
    if "exp_type" in traces_meta_dict[e_uuid].keys():
        exp_type = traces_meta_dict[e_uuid]["exp_type"]
        if "sz" not in exp_type:
            print(f"{exp_type} does not contain seizure... Skipping...")
    else:
        exp_type = "tmev"
        
    i_begin_sz_frame = traces_meta_dict[e_uuid]["segment_type_break_points"][1]
    i_begin_sd_frame = traces_meta_dict[e_uuid]["segment_type_break_points"][2]
    dict_begin_end_frames[e_uuid] = (i_begin_sz_frame, i_begin_sd_frame)
    n_segment_frames = i_begin_sd_frame - i_begin_sz_frame

    
    lv_totdist = traces_dict[e_uuid]["lv_totdist"]
    lv_totdist_abs = traces_dict[e_uuid]["lv_totdist_abs"]
    lv_running = traces_dict[e_uuid]["lv_running"]
    lv_speed = traces_dict[e_uuid]["lv_speed"]

    lv_speed_sz = lv_speed[i_begin_sz_frame:i_begin_sz_frame+n_segment_frames]
    lv_speed_sd = lv_speed[i_begin_sd_frame:i_begin_sd_frame+n_segment_frames]

    lv_running_sz = lv_running[i_begin_sz_frame:i_begin_sz_frame+n_segment_frames]
    lv_running_sd = lv_running[i_begin_sd_frame:i_begin_sd_frame+n_segment_frames]


    # get metrics for sz and sd segments
    totdist_sz = get_trace_delta(lv_totdist, i_begin_sz_frame, i_begin_sz_frame+n_segment_frames)
    totdist_sd = get_trace_delta(lv_totdist, i_begin_sd_frame, i_begin_sd_frame+n_segment_frames)

    totdist_abs_sz = get_trace_delta(lv_totdist_abs, i_begin_sz_frame, i_begin_sz_frame+n_segment_frames)
    totdist_abs_sd = get_trace_delta(lv_totdist_abs, i_begin_sd_frame, i_begin_sd_frame+n_segment_frames)

    # number of episodes
    list_episodes_sz = get_episodes(lv_running_sz, True, EPISODE_MERGE_THRESHOLD_FRAMES, return_begin_end_frames=True )  # 15 frames in 15 Hz is 1 s.
    list_episodes_sd = get_episodes(lv_running_sd, True, EPISODE_MERGE_THRESHOLD_FRAMES, return_begin_end_frames=True )  # 15 frames in 15 Hz is 1 s.
    
    list_episodes_sz = apply_threshold(lv_speed_sz, list_episodes_sz, TEMP_THRESHOLD, AMPL_THRESHOLD, )
    list_episodes_sd = apply_threshold(lv_speed_sd, list_episodes_sd, TEMP_THRESHOLD, AMPL_THRESHOLD, )
    
    list_episode_lengths_sz = np.array([ep[1]-ep[0] + 1 for ep in list_episodes_sz])
    n_episodes_sz = len(list_episodes_sz)
    
    list_episode_lengths_sd = np.array([ep[1]-ep[0] + 1 for ep in list_episodes_sd])
    n_episodes_sd = len(list_episodes_sd)

    # running %
    running_sz = np.sum(list_episode_lengths_sz)
    running_sd = np.sum(list_episode_lengths_sd)

    # speed
    speed_sz = sum(lv_speed_sz)
    speed_sd = sum(lv_speed_sd)
    
    # avg speed
    avg_speed_sz = calculate_avg_speed(lv_speed_sz)
    avg_speed_sd = calculate_avg_speed(lv_speed_sd)

    # max speed
    max_speed_sz = calculate_max_speed(lv_speed_sz)
    max_speed_sd = calculate_max_speed(lv_speed_sd)

    # episode mean length, std
    episode_mean_len_sz = list_episode_lengths_sz.mean() if len(list_episode_lengths_sz) > 0 else 0
    episode_std_sz = list_episode_lengths_sz.std()
    episode_mean_len_sd = list_episode_lengths_sd.mean() if len(list_episode_lengths_sd) > 0 else 0
    episode_std_sd = list_episode_lengths_sd.std()

    #apply_threshold, get_episodes, calculate_avg_speed, calculate_max_speed
    list_statistics.append([e_uuid, mouse_id, win_type, exp_type, "sz", n_segment_frames, totdist_sz, totdist_abs_sz, running_sz, speed_sz, avg_speed_sz, n_episodes_sz, episode_mean_len_sz, episode_std_sz, max_speed_sz, ])
    list_statistics.append([e_uuid, mouse_id, win_type, exp_type, "sd", n_segment_frames, totdist_sd, totdist_abs_sd, running_sd, speed_sd, avg_speed_sd, n_episodes_sd, episode_mean_len_sd, episode_std_sd, max_speed_sd, ])


In [None]:
df_stats = pd.DataFrame(data=list_statistics, columns=["event_uuid", "mouse_id", "window_type", "exp_type", "segment_type",  "segment_length", "totdist", "totdist_abs", "running", "speed", "avg_speed", "running_episodes", "running_episodes_mean_length", "running_episodes_length_std", "max_speed", ])

In [None]:
df_stats["avg_speed"] = df_stats["avg_speed"].fillna(0)
df_stats["running_episodes_mean_length"] = df_stats["running_episodes_mean_length"].fillna(value=0)

In [None]:
# % of time spent running
df_stats["running%"] = 100.*df_stats["running"]/df_stats["segment_length"]  # get value as true % instead of [0, 1] float

In [None]:
#scale_factor = 10000

#df_stats["totdist_norm"] = scale_factor*df_stats["totdist"]/df_stats["segment_length"]
#df_stats["totdist_abs_norm"] = scale_factor*df_stats["totdist_abs"]/df_stats["segment_length"]
#df_stats["running_norm"] = scale_factor*df_stats["running"]/df_stats["segment_length"]
#df_stats["speed_norm"] = scale_factor*df_stats["speed"]/df_stats["segment_length"]

In [None]:
df_stats["color"] = df_stats.apply(lambda row: dict_colors_mouse[row["mouse_id"]], axis=1)

In [None]:
dict_colors_event = df_stats[["event_uuid", "color"]].to_dict(orient="list")
dict_colors_event = dict(zip(dict_colors_event["event_uuid"], dict_colors_event["color"]))

In [None]:
df_stats["window_type"] = df_stats["window_type"].replace({"Cx" : "NC", "ca1": "CA1"})

In [None]:
if save_data:
    output_fpath = os.path.join(output_folder, f"loco_tmev_sz-vs-sd_{output_dtime}.xlsx")
    df_stats.to_excel(output_fpath, index=False)
    print(f"Results exported to {output_fpath}")

## Create deltas
Group by event uuid,  get Sz values, get SD values, subtract

In [None]:
l_df_deltas = []

for i, metric in enumerate(STAT_METRICS):  # fill each row
    #if group_by_colname == "event_uuid":
    group_by_colname = "event_uuid"
    df_metric_pivot = df_stats.pivot(columns="segment_type", index=group_by_colname, values=metric).reset_index()
    #else:  # mouse_id may not be unique (multiple experiment types, like chr2_ctl, chr2_sd, for one mouse)
    #    df_metric_pivot = df_stat_data.pivot(columns="segment_type", index=[group_by_colname, "exp_type"], values=metric).reset_index()
    # 1 window per mouse
    df_metric_pivot["window_type"] = df_metric_pivot.apply(lambda row: df_stats[df_stats[group_by_colname] == row[group_by_colname]].window_type.iloc[0], axis=1)   
    df_metric_pivot["mouse_id"] = df_metric_pivot.apply(lambda row: df_stats[df_stats[group_by_colname] == row[group_by_colname]].mouse_id.iloc[0], axis=1) 
    if "exp_type" not in df_metric_pivot.columns:
        df_metric_pivot["exp_type"] = df_metric_pivot.apply(lambda row: df_stats[df_stats[group_by_colname] == row[group_by_colname]].exp_type.iloc[0], axis=1)   
    metric_diff_name = f"delta_{metric}"
    df_metric_pivot[metric_diff_name] = df_metric_pivot["sd"] - df_metric_pivot["sz"]
    # only keep the change (delta), drop the quantities themselves
    df_metric_pivot = df_metric_pivot.drop(["sz", "sd"], axis=1)
    l_df_deltas.append(df_metric_pivot)

In [None]:
df_deltas_combined = l_df_deltas[0]
for df_delta in l_df_deltas[1:]:
    df_deltas_combined = pd.merge(df_deltas_combined, df_delta, on=["event_uuid", "window_type", "mouse_id", "exp_type"], how="outer")
if save_data:
    output_fpath = os.path.join(output_folder, f"loco_tmev_sz-vs-sd_delta_{output_dtime}.xlsx")
    df_deltas_combined.to_excel(output_fpath, index=False)
    print(f"Results exported to {output_fpath}")

## Visualize results

In [None]:
assert len(STAT_METRICS) == 6
n_rows = 2
n_cols = 3
fig, axs = plt.subplots(n_rows, n_cols, figsize=(12*n_cols, 10*n_rows))
for i_metric, stat_metric in enumerate(STAT_METRICS):
    sns.boxplot(data=df_deltas_combined, x="window_type", y="delta_"+stat_metric, ax=axs[i_metric//n_cols][i_metric%n_cols])
plt.tight_layout()

if save_figs:
    fig_fpath = os.path.join(output_folder, f'loco_tmev_sz-vs-sd_deltas_{output_dtime}{file_format}')
    plt.savefig(fig_fpath, format=file_format.split(".")[-1])
    print(f"Saved to {fig_fpath}")
plt.show()

## Create waterfall plot

In [None]:
df_stats

In [None]:
AMPLITUDE = LV_SPEED_AMPL
offset = 0
mouse_ids = df_stats.mouse_id.unique()

n_recordings = len(mouse_ids)
fig = plt.figure(figsize=(18,n_recordings*3))
prev_range = 0.0

for mouse_id in mouse_ids: 
    for e_uuid in df_stats[df_stats["mouse_id"]==mouse_id].event_uuid.unique():
        # this script does not work with chr2 out of the box, but in case of future update, leave this already in
        if is_chr2:  # chr2 experiments contain the whole session in one file
            df_segments = ddoc.getSegmentsForUUID(e_uuid)
            i_frame_stim_begin = df_segments[df_segments["interval_type"] == "stimulation"].frame_begin.iloc[0] - 1
            i_frame_stim_end = df_segments[df_segments["interval_type"] == "stimulation"].frame_end.iloc[0] - 1  # in 1 indexing, inclusive
        else:  # in tmev recordings, there is no stim, but it is the seizure segment (see value_mapping)
            metadata_dict = traces_meta_dict[e_uuid]
            i_frame_stim_begin = metadata_dict["n_bl_frames"]
            i_frame_stim_end = metadata_dict["n_frames"] - metadata_dict["n_am_frames"]
            

        # add vlines marking the two windows
        i_sz_begin, i_sd_begin = dict_begin_end_frames[e_uuid]
        n_segment_frames = int(i_sd_begin - i_sz_begin)
        
        if e_uuid == "f0442bebcd1a4291a8d0559eb47df08e":
            print(n_segment_frames)
        t = traces_dict[e_uuid]["lv_t_s"]
        t = t - t[0]

        sz_stats = df_stats[(df_stats["event_uuid"] == e_uuid) & (df_stats["segment_type"] == "sz")].iloc[0]
        sd_stats = df_stats[(df_stats["event_uuid"] == e_uuid) & (df_stats["segment_type"] == "sd")].iloc[0]

        
        
        labview_trace = traces_dict[e_uuid]["lv_speed"]
        min_lv = min(labview_trace)
        max_lv = max(labview_trace)

        mean_fluo = traces_dict[e_uuid]["mean_fluo"]
        min_fluo = min(mean_fluo)
        max_fluo = max(mean_fluo)
        mean_fluo = 0.95*AMPLITUDE*(mean_fluo - min_fluo)/(max_fluo - min_fluo)

        plt.vlines([t[i_sz_begin], t[i_sd_begin], t[i_sd_begin+n_segment_frames]], offset, offset+2*AMPLITUDE, color="black", linestyle="-")
        plt.text(t[i_sz_begin], offset+0.2*AMPLITUDE, f'd={sz_stats["totdist_abs"]:.3f}, {sz_stats["running%"]:.2f}%, eps={sz_stats["running_episodes"]} mean {sz_stats["running_episodes_mean_length"]:.2f},\nv={sz_stats["avg_speed"]:.3f}', fontsize=10, color="red")
        plt.text(t[i_sd_begin+60], offset+0.2*AMPLITUDE, f'd={sd_stats["totdist_abs"]:.3f}, {sd_stats["running%"]:.2f}%, eps={sd_stats["running_episodes"]} mean {sd_stats["running_episodes_mean_length"]:.2f},\nv={sd_stats["avg_speed"]:.3f}, uuid: {e_uuid}', fontsize=10, color="red")

        t = t[i_sz_begin-30:i_sd_begin+n_segment_frames+30]
        mean_fluo = mean_fluo[i_sz_begin-30:i_sd_begin+n_segment_frames+30]
        labview_trace = labview_trace[i_sz_begin-30:i_sd_begin+n_segment_frames+30]


        color = df_colors[df_colors["mouse_id"] == mouse_id].color.iloc[0]
        plt.plot(t, labview_trace - min_lv+offset, color=color)
        offset +=AMPLITUDE
        plt.plot(t, mean_fluo+offset, color=color)
        offset +=1.3*AMPLITUDE


plt.suptitle(exp_type, fontsize=22)
#plt.axis("off")
plt.yticks([])
plt.xlabel("Time (s)", fontsize=14)
plt.tight_layout()
#plt.xlim((300, 460))  # 250, 500
if save_figs:
    out_fpath = os.path.join(output_folder, f"loco_tmev_sz-vs-sd_waterfall_{exp_type}_{output_dtime}{file_format}")
    plt.savefig(out_fpath,bbox_inches='tight', dpi=300)
    print(f"Saved as {out_fpath}")
plt.show()