# Locomotion analysis v3.0
This script works with the pre-assembled dataset, cutting out most of the acquiring data part, resulting in cleaner and more reliable analysis (the dataset is manually checked separately from this script).

# Set version

In [None]:
output_version = "v1.0"

# Import libraries

In [None]:
#Auto-reload modules (used to develop functions outside this notebook)
%load_ext autoreload
%autoreload 2

In [None]:
import labrotation.file_handling as fh
import h5py
from time import time
import matplotlib.pyplot as plt
import numpy as np
import os
from labrotation import file_handling as fh
from copy import deepcopy
import pandas as pd
import labrotation.two_photon_session as tps
import seaborn as sns
import uuid  # for unique labeling of sessions and coupling arrays (mouse velocity, distance, ...) to sessions in dataframe 
from matplotlib import cm  # colormap
import datadoc_util
from labrotation import two_photon_session as tps
from datetime import datetime
import seaborn as sns

# Set seaborn parameters

In [None]:
sns.set(font_scale=2)
sns.set_style("whitegrid")

# If exists, load environmental variables from .env file

In [None]:
env_dict = dict()
if not os.path.exists("./.env"):
    print(".env does not exist")
else:
    with open("./.env", "r") as f:
        for line in f.readlines():
            l = line.rstrip().split("=")
            env_dict[l[0]] = l[1]
print(env_dict.keys())

# Set up data documentation directory

In [None]:
# assumption: inside the documentation folder, the subfolders carry the id of each mouse (not exact necessarily, but they 
# can be identified by the name of the subfolder). 
# Inside the subfolder xy (for mouse xy), xy_grouping.xlsx and xy_segmentation.xlsx can be found.
# xy_grouping.xlsx serves the purpose of finding the recordings belonging together, and has columns:
# folder, nd2, labview, lfp, face_cam_last, nikon_meta, experiment_type, day
# xy_segmentation.xlsx contains frame-by-frame (given by a set of disjoint intervals forming a cover for the whole recording) 
# classification of the events in the recording ("normal", seizure ("sz"), sd wave ("sd_wave") etc.). The columns:
# folder, interval_type, frame_begin, frame_end.

# TODO: write documentation on contents of xlsx files (what the columns are etc.)
if "DATA_DOCU_FOLDER" in env_dict.keys():
    docu_folder = env_dict["DATA_DOCU_FOLDER"]
else:
    docu_folder = fh.open_dir("Choose folder containing folders for each mouse!")
print(f"Selected folder:\n\t{docu_folder}")

In [None]:
if "documentation" in os.listdir(docu_folder):
    mouse_folder = os.path.join(docu_folder, "documentation")
else:
    mouse_folder = docu_folder
mouse_names = os.listdir(mouse_folder)
print(f"Mice detected:")
for mouse in mouse_names:
    print(f"\t{mouse}")

### Set up export figure parameters

In [None]:
save_figs = False  # set to True to save the figures created
save_as_eps = True
if save_as_eps:
    file_format = ".eps"
else:
    file_format = ".jpg"
if save_figs:
    print(f"Going to save figures as {file_format} files.")

In [None]:
def get_datetime_for_fname():
    now = datetime.now()
    return f"{now.year:04d}{now.month:02d}{now.day:02d}-{now.hour:02d}{now.minute:02d}{now.second:02d}"

In [None]:
output_folder = env_dict["DOWNLOADS_FOLDER"]
print(f"Output files will be saved to {output_folder}")

### Load matlab-2p

In [None]:
if "MATLAB_2P_FOLDER" in env_dict.keys():
    matlab_2p_folder = env_dict["MATLAB_2P_FOLDER"]
else:
    matlab_2p_folder = fh.open_dir("Choose matlab-2p folder")
print(f"matlab-2p folder set to:\n\t{matlab_2p_folder}")

### Load data documentation

In [None]:
ddoc = datadoc_util.DataDocumentation(docu_folder)
ddoc.loadDataDoc()

### Load events_list dataset

In [None]:
events_list_fpath = os.path.join(docu_folder, "events_list.xlsx")
assert os.path.exists(events_list_fpath)

df_events_list = pd.read_excel(events_list_fpath)

## Load dataset

In [None]:
assembled_traces_fpath = fh.open_file("Open assembled_traces h5 file!")

In [None]:
traces_dict = dict()  
traces_meta_dict = dict()
# first keys are event uuids, inside the following dataset names:
# 'lfp_mov_t', 'lfp_mov_y', 'lfp_t', 'lfp_y', 'lv_dist', 'lv_rounds', 
# 'lv_running', 'lv_speed', 'lv_t_s', 'lv_totdist', 'mean_fluo'
with h5py.File(assembled_traces_fpath, "r") as hf:
    for uuid in hf.keys():
        session_dataset_dict = dict() 
        session_meta_dict = dict()
        for dataset_name in hf[uuid].keys():
            session_dataset_dict[dataset_name] = np.array(hf[uuid][dataset_name])
        for attr_name in hf[uuid].attrs:
            session_meta_dict[attr_name] = hf[uuid].attrs[attr_name]
        traces_dict[uuid] = session_dataset_dict.copy()
        traces_meta_dict[uuid] = session_meta_dict.copy()

# Calculate locomotion statistics

In [None]:
# each entry (row) should have columns: 
# uuid of event, mouse id, window type, segment type (bl/sz/am), segment length in frames, totdist, running, speed
list_statistics = []  
for event_uuid in traces_dict.keys():
    mouse_id = traces_meta_dict[event_uuid]["mouse_id"]
    win_type = traces_meta_dict[event_uuid]["window_type"]
    # get segment lengths
    n_bl_frames = traces_meta_dict[event_uuid]["n_bl_frames"]
    n_am_frames = traces_meta_dict[event_uuid]["n_am_frames"]
    n_frames = traces_meta_dict[event_uuid]["n_frames"]
    n_sz_frames = n_frames - n_am_frames - n_bl_frames
    # get movement data
    lv_totdist = traces_dict[event_uuid]["lv_totdist"]
    lv_totdist_abs = traces_dict[event_uuid]["lv_totdist_abs"]
    lv_running = traces_dict[event_uuid]["lv_running"]
    lv_speed = traces_dict[event_uuid]["lv_speed"]
    # cut up data into segments
    lv_totdist_bl = lv_totdist[:n_bl_frames]
    lv_totdist_sz = lv_totdist[n_bl_frames:n_bl_frames+n_sz_frames]
    lv_totdist_am = lv_totdist[n_bl_frames+n_sz_frames:]
    lv_totdist_abs_bl = lv_totdist_abs[:n_bl_frames]
    lv_totdist_abs_sz = lv_totdist_abs[n_bl_frames:n_bl_frames+n_sz_frames]
    lv_totdist_abs_am = lv_totdist_abs[n_bl_frames+n_sz_frames:]
    lv_running_bl = lv_running[:n_bl_frames]
    lv_running_sz = lv_running[n_bl_frames:n_bl_frames+n_sz_frames]
    lv_running_am = lv_running[n_bl_frames+n_sz_frames:]
    lv_speed_bl = lv_speed[:n_bl_frames]
    lv_speed_sz = lv_speed[n_bl_frames:n_bl_frames+n_sz_frames]
    lv_speed_am = lv_speed[n_bl_frames+n_sz_frames:]
    # calculate statistics
    totdist_bl = lv_totdist_bl[-1] - lv_totdist_bl[0]
    totdist_sz = lv_totdist_sz[-1] - lv_totdist_sz[0]
    totdist_am = lv_totdist_am[-1] - lv_totdist_am[0]
    totdist_abs_bl = lv_totdist_abs_bl[-1] - lv_totdist_abs_bl[0]
    totdist_abs_sz = lv_totdist_abs_sz[-1] - lv_totdist_abs_sz[0]
    totdist_abs_am = lv_totdist_abs_am[-1] - lv_totdist_abs_am[0]
    running_bl = sum(lv_running_bl)
    running_sz = sum(lv_running_sz)
    running_am = sum(lv_running_am)
    speed_bl = sum(lv_speed_bl)
    speed_sz = sum(lv_speed_sz)
    speed_am = sum(lv_speed_am)
    
    # add to data list
    list_statistics.append([event_uuid, mouse_id, win_type, "bl", n_bl_frames, totdist_bl, totdist_abs_bl, running_bl, speed_bl, ])
    list_statistics.append([event_uuid, mouse_id, win_type, "sz", n_sz_frames, totdist_sz, totdist_abs_sz, running_sz, speed_sz])
    list_statistics.append([event_uuid, mouse_id, win_type, "am", n_am_frames, totdist_am, totdist_abs_am, running_am, speed_am])

In [None]:
df_stats = pd.DataFrame(data=list_statistics, columns=["event_uuid", "mouse_id", "window_type", "segment_type",  "segment_length", "totdist", "totdist_abs", "running", "speed"])

In [None]:
df_stats["totdist_norm"] = df_stats["totdist"]/df_stats["segment_length"]
df_stats["totdist_abs_norm"] = df_stats["totdist_abs"]/df_stats["segment_length"]
df_stats["running_norm"] = df_stats["running"]/df_stats["segment_length"]
df_stats["speed_norm"] = df_stats["speed"]/df_stats["segment_length"]

In [None]:
fig = plt.figure(figsize=(10,10))
sns.violinplot(x="segment_type", y="running_norm", data=df_stats)
#sns.stripplot(data=df_stats[df_stats["window_type"]=="CA1"], x="speed_norm", y="segment_type", hue="mouse_id", dodge=True, zorder=1, legend=False)
plt.show()

In [None]:
loco_statistic = "totdist_abs"

fig, axs = plt.subplots(2, 3, figsize=(22,12), sharey=False)
sns.pointplot(data=df_stats, x="segment_type", y=loco_statistic, ax=axs[0][0])
sns.pointplot(data=df_stats, x="segment_type", y="running", ax=axs[0][1])
sns.pointplot(data=df_stats, x="segment_type", y="speed", ax=axs[0][2])

sns.pointplot(data=df_stats[df_stats["segment_type"] == "bl"], x="window_type", y=loco_statistic, join=False, capsize=.1, ax=axs[1][0])
sns.pointplot(data=df_stats[df_stats["segment_type"] == "sz"], x="window_type", y=loco_statistic, join=False, capsize=.1, ax=axs[1][1])
sns.pointplot(data=df_stats[df_stats["segment_type"] == "am"], x="window_type", y=loco_statistic, join=False, capsize=.1, ax=axs[1][2])
if save_figs:
    fig_fpath = os.path.join(output_folder, f'loco_mean_per_segment_{get_datetime_for_fname()}_{output_version}{file_format}')
    plt.savefig(fig_fpath, format=file_format.split(".")[-1])
    print(f"Saved to {fig_fpath}")
plt.show()

In [None]:
loco_statistic = "totdist_abs"

fig, axs = plt.subplots(2, 3, figsize=(22,12))
sns.lineplot(data=df_stats, x="segment_type", y=loco_statistic, hue="event_uuid", ax=axs[0][0], legend=False)
sns.lineplot(data=df_stats, x="segment_type", y="running", hue="event_uuid", ax=axs[0][1], legend=False)
sns.lineplot(data=df_stats, x="segment_type", y="speed", hue="event_uuid", ax=axs[0][2], legend=False)


sns.stripplot(data=df_stats[df_stats["segment_type"] == "bl"], x="window_type", y=loco_statistic, size=8, ax=axs[1][0], legend=False)
sns.stripplot(data=df_stats[df_stats["segment_type"] == "sz"], x="window_type", y=loco_statistic, size=8, ax=axs[1][1], legend=False)
sns.stripplot(data=df_stats[df_stats["segment_type"] == "am"], x="window_type", y=loco_statistic, size=8, ax=axs[1][2], legend=False)
if save_figs:
    fig_fpath = os.path.join(output_folder, f'loco_per_segment_{get_datetime_for_fname()}_{output_version}{file_format}')
    plt.savefig(fig_fpath, format=file_format.split(".")[-1])
    print(f"Saved to {fig_fpath}")
plt.show()

In [None]:
loco_statistic = "totdist_abs"

fig, axs = plt.subplots(2, 3, figsize=(22,12))
sns.lineplot(data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y=loco_statistic, hue="event_uuid", ax=axs[0][0], legend=False)
sns.lineplot(data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y="running", hue="event_uuid", ax=axs[0][1], legend=False)
sns.lineplot(data=df_stats[df_stats["segment_type"].isin(["bl", "am"])], x="segment_type", y="speed", hue="event_uuid", ax=axs[0][2], legend=False)


sns.stripplot(data=df_stats[df_stats["segment_type"] == "bl"], x="window_type", y=loco_statistic, size=8, ax=axs[1][0], legend=False)
sns.stripplot(data=df_stats[df_stats["segment_type"] == "sz"], x="window_type", y=loco_statistic, size=8, ax=axs[1][1], legend=False)
sns.stripplot(data=df_stats[df_stats["segment_type"] == "am"], x="window_type", y=loco_statistic, size=8, ax=axs[1][2], legend=False)
    
if save_figs:
    fig_fpath = os.path.join(output_folder, f'loco_per_segment_sz-excluded_{get_datetime_for_fname()}_{output_version}{file_format}')
    plt.savefig(fig_fpath, format=file_format.split(".")[-1])
    print(f"Saved to {fig_fpath}")
plt.show()