# Baseline recovery 2.0
This script handles the new format data, where the number of baseline and aftermath frames are variable for each recording. Only use with TMEV data.

In [1]:
#Auto-reload modules (used to develop functions outside this notebook)
%load_ext autoreload
%autoreload 2

In [2]:
import labrotation.file_handling as fh
import h5py
from time import time
import matplotlib.pyplot as plt
import numpy as np
import os
from labrotation import file_handling as fh
from copy import deepcopy
import pandas as pd
import labrotation.two_photon_session as tps
import seaborn as sns
import uuid  # for unique labeling of sessions and coupling arrays (mouse velocity, distance, ...) to sessions in dataframe 
from matplotlib import cm  # colormap
import datadoc_util
from labrotation import two_photon_session as tps
from datetime import datetime
import seaborn as sns
from math import floor, ceil
from scipy.stats import ttest_ind, ttest_rel, mannwhitneyu, ranksums
from numpy.polynomial.polynomial import Polynomial

In [3]:
sns.set(font_scale=2)
sns.set_style("whitegrid")

In [4]:
env_dict = dict()
if not os.path.exists("./.env"):
    print(".env does not exist")
else:
    with open("./.env", "r") as f:
        for line in f.readlines():
            l = line.rstrip().split("=")
            env_dict[l[0]] = l[1]
print(env_dict.keys())

dict_keys(['DATA_DOCU_FOLDER', 'DOWNLOADS_FOLDER', 'LOG_FOLDER', 'MATLAB_2P_FOLDER', 'FLUO_LV_MATCHED_FOLDER'])


In [5]:
save_dsets = False

In [6]:
save_figs = False
save_as_eps = False
save_as_pdf = True
if save_as_eps:
    output_format = ".eps"
elif save_as_pdf:
    output_format=".pdf"
else:
    output_format = ".jpg"
if save_figs:
    print(output_format)

In [7]:
if "DATA_DOCU_FOLDER" in env_dict.keys():
    docu_folder = env_dict["DATA_DOCU_FOLDER"]
else:
    docu_folder = fh.open_dir("Choose folder containing folders for each mouse!")
print(f"Selected folder:\n\t{docu_folder}")

Selected folder:
	D:\PhD\Data documentation


In [8]:
if "documentation" in os.listdir(docu_folder):
    mouse_folder = os.path.join(docu_folder, "documentation")
else:
    mouse_folder = docu_folder
mouse_names = os.listdir(mouse_folder)
print(f"Mice detected:")
for mouse in mouse_names:
    print(f"\t{mouse}")

Mice detected:
	OPI-2239
	T301
	T303
	T324
	T329
	T333
	T337
	T352
	T370
	T386
	T391
	T396
	T413
	T430
	T452
	T534
	WEZ-8917
	WEZ-8922
	WEZ-8924


In [9]:
def get_datetime_for_fname():
    now = datetime.now()
    return f"{now.year:04d}{now.month:02d}{now.day:02d}-{now.hour:02d}{now.minute:02d}{now.second:02d}"

In [10]:
output_folder = env_dict["DOWNLOADS_FOLDER"]
print(f"Output files will be saved to {output_folder}")

Output files will be saved to D:\Downloads


In [11]:
ddoc = datadoc_util.DataDocumentation(docu_folder)
ddoc.loadDataDoc()

## Load all seizures dataset

In [12]:
df_events = ddoc.getEventsDf()
df_events = df_events[df_events["event_type"] == "sz"] 

In [13]:
event_traces_fpath = fh.open_file("Open .h5 file containing assembled traces for all seizures!")
print(event_traces_fpath)

D:\PhD\Data\traces_for_recovery_20231215-182608.h5


In [14]:
df_colors = ddoc.getColorings()

In [15]:
traces_ca1 = []
traces_nc = []

mouse_ids_ca1 = []
mouse_ids_nc = []

uuids_ca1 = []
uuids_nc = []

session_uuids_ca1 = []
session_uuids_nc = []

recording_break_points_ca1 = []
recording_break_points_nc = []

n_bl_frames_ca1 = []
n_am_frames_ca1 = []
n_sz_frames_ca1 = []

n_bl_frames_nc = []
n_am_frames_nc = []
n_sz_frames_nc = []

N_BL_AM_FRAMES_MINIMUM = 5000

# first keys are event uuids, inside the following dataset names:
# 'lfp_mov_t', 'lfp_mov_y', 'lfp_t', 'lfp_y', 'lv_dist', 'lv_rounds', 
# 'lv_running', 'lv_speed', 'lv_t_s', 'lv_totdist', 'mean_fluo'
with h5py.File(event_traces_fpath, "r") as hf:
    for uuid in hf.keys():
        win_type = hf[uuid].attrs["window_type"]
        mean_fluo = np.array(hf[uuid]["mean_fluo"])

        n_bl_frames = hf[uuid].attrs["n_bl_frames"]
        n_am_frames = hf[uuid].attrs["n_am_frames"]
        n_sz_frames = hf[uuid].attrs["n_sz_frames"]
        
        if min(n_bl_frames, n_am_frames) < N_BL_AM_FRAMES_MINIMUM:
            print(f"Skipping {uuid}: not enough bl or am frames ({n_bl_frames}, {n_am_frames})")
            continue
        else:
            mouse_id = hf[uuid].attrs["mouse_id"]
            if win_type == "Cx":
                traces_nc.append(mean_fluo)
                uuids_nc.append(uuid)
                session_uuids_nc.append(hf[uuid].attrs["session_uuids"])
                recording_break_points_nc.append(hf[uuid].attrs["recording_break_points"])
                mouse_ids_nc.append(mouse_id)
                n_bl_frames_nc.append(n_bl_frames)
                n_am_frames_nc.append(n_am_frames)
                n_sz_frames_nc.append(n_sz_frames)
            elif win_type == "CA1":
                traces_ca1.append(mean_fluo)
                uuids_ca1.append(uuid)
                session_uuids_ca1.append(hf[uuid].attrs["session_uuids"])
                recording_break_points_ca1.append(hf[uuid].attrs["recording_break_points"])
                mouse_ids_ca1.append(mouse_id)
                n_bl_frames_ca1.append(n_bl_frames)
                n_am_frames_ca1.append(n_am_frames)
                n_sz_frames_ca1.append(n_sz_frames)
            else:
                print(f"{win_type} not recognized window type")

Skipping 74473c5d22e04525acf53f5a5cb799f4: not enough bl or am frames (262, 8455)
Skipping d158cd12ad77489a827dab1173a933f9: not enough bl or am frames (8600, 4899)


### Get color data

In [16]:
colors_ca1 = [df_colors[df_colors["mouse_id"] == mouse_id].color.iloc[0] for mouse_id in mouse_ids_ca1]
colors_nc = [df_colors[df_colors["mouse_id"] == mouse_id].color.iloc[0] for mouse_id in mouse_ids_nc]

# Analysis

## 1. Recovery time points

In [26]:
# get a list of trace indices that are sorted by mouse
event_uuid_mouse_id_i_trace_ca1 = []  # list of (event_uuid, mouse_id, i_trace) tuples
event_uuid_mouse_id_i_trace_nc  = []  # list of (event_uuid, mouse_id, i_trace) tuples


for event_uuid in df_events["event_uuid"].unique():
    mouse_id = df_events[df_events["event_uuid"] == event_uuid].mouse_id.iloc[0]
    if event_uuid in uuids_ca1:
        i_trace = uuids_ca1.index(event_uuid)
        event_uuid_mouse_id_i_trace_ca1.append((event_uuid, mouse_id, i_trace))
    elif event_uuid in uuids_nc:
        i_trace = uuids_nc.index(event_uuid)
        event_uuid_mouse_id_i_trace_nc.append((event_uuid, mouse_id, i_trace))
        
    else:
        print(f"Unknown event_uuid: {event_uuid}")

Unknown event_uuid: d158cd12ad77489a827dab1173a933f9
Unknown event_uuid: 74473c5d22e04525acf53f5a5cb799f4


In [None]:
window_width_s = 10
window_step_s = 5
imaging_frequency = 15. # in Hz
n_frames_before_nc = 200  # include 200 frames just before aftermath for NC recordings  
n_frames_before_ca1 = 0
n_windows_post_darkest = 40 # dataset consists of bl, darkest point, and this many windows post darkest point

default_bl_center_ca1 = 4925
default_bl_center_nc = 4025

window_width_frames = int(window_width_s*imaging_frequency)
window_step_frames = int(window_step_s*imaging_frequency)

half_window_width_frames = window_width_frames//2

# define baseline windows
bl_windows_nc = [(default_bl_center_nc - half_window_width_frames, default_bl_center_nc + half_window_width_frames) for i in range(len(traces_nc))]  # for neocortex, allow for ~1 min before Sz (LFP sz comes earlier)
bl_windows_ca1 = [(default_bl_center_ca1 - half_window_width_frames, default_bl_center_ca1 + half_window_width_frames) for i in range(len(traces_ca1))]  # for CA1, immediately before Sz onset

i_frame_begin_bl = 3850  # in 0-indexing, the first frame to be included in baseline
i_frame_end_bl = 4000  # in 0-indexing, the first frame after baseline (i.e. not included)

time_points = ["bl", "darkest"] + [f"{(i+1)*window_step_s}s" for i in range(n_windows_post_darkest)]
time_points_numeric = [-window_step_s, 0] + [(i+1)*window_step_s for i in range(n_windows_post_darkest)]
time_points_numeric = np.array(time_points_numeric)


def get_metric_for_window(trace_window):
    lowest_5p_indices = np.argsort(trace_window)[:int(0.05*len(trace_window))]
    lowest_5p = trace_window[lowest_5p_indices]
    return np.median(lowest_5p)

def get_recovery_data(complete_trace, i_frame_begin_bl, i_frame_end_bl, n_frames_before_am=0):
    # n_frames_before_am: for NC, need to include a few frames before the segment "aftermath" begins, due to mistakes in 
    # manual classification. In CA1, this is not necessary
    
    metrics_list = []
    x_list = []
    
    
    # The complete trace should consist of 5000 bl, x Sz, 5000 am frames.
    # get bl as just before Sz begin
    bl_trace = complete_trace[i_frame_begin_bl:i_frame_end_bl]
    x_bl = (i_frame_begin_bl + i_frame_end_bl)//2  # TODO: assign proper x
    y_bl = get_metric_for_window(bl_trace)
    
    # add bl to dataset
    x_list.append(x_bl)
    metrics_list.append(y_bl)
    
    # get am 5p darkest points
    sorted_indices = np.argsort(complete_trace)
    sorted_am_indices = sorted_indices[sorted_indices > len(complete_trace) - 5000 - n_frames_before_am]
    am_x_5p_lowest = sorted_am_indices[:int(0.05*(5000+n_frames_before_am))] 
    
    # get single coordinate for darkest part
    # find darkest 5p, take earliest 50 of them, get median frame index of these, round down to integer frame
    x_am_darkest = int(floor(np.median(np.sort(am_x_5p_lowest)[:50])))
    
    # create sliding windows, calculate metric
    for i_window in range(n_windows_post_darkest+1):  # window around darkest point + n_windows_post_darkest windows
        x_val = x_am_darkest + i_window*window_step_frames
        window_half_width = window_width_frames//2
        window_trace = complete_trace[x_val - window_half_width : x_val + window_half_width]
        y_val = get_metric_for_window(window_trace)
        
        x_list.append(x_val)
        metrics_list.append(y_val)
        
    return (x_list, metrics_list)
    

### Manually modify baseline values

In [None]:
event_uuid_mouse_id_i_trace_ca1
# CA1:
# aa66ae0470a14eb08e9bcadedc34ef64 : ~4250
# c7b29d28248e493eab02288b85e3adee : 4000
# 7b9c17d8a1b0416daf65621680848b6a : 4050
# 9e75d7135137444492d104c461ddcaac : 4700
# d158cd12ad77489a827dab1173a933f9 : 4500
# a39ed3a880c54f798eff250911f1c92f : 4500
# 4e2310d2dde845b0908519b7196080e8 : 4500
# f0442bebcd1a4291a8d0559eb47df08e : 4500
# NC:
# 2251bba132cf45fa839d3214d1651392 : 3700
# cd3c1e0e3c284a89891d2e4d9a7461f4 : 3500
dict_uuid_manual_bl_center = {"aa66ae0470a14eb08e9bcadedc34ef64": 4250, "c7b29d28248e493eab02288b85e3adee": 4000,  "7b9c17d8a1b0416daf65621680848b6a": 4050, "9e75d7135137444492d104c461ddcaac": 4700, "d158cd12ad77489a827dab1173a933f9": 4500, "a39ed3a880c54f798eff250911f1c92f" : 4500, "4e2310d2dde845b0908519b7196080e8" : 4500, "f0442bebcd1a4291a8d0559eb47df08e": 4500, "2251bba132cf45fa839d3214d1651392": 3700, "cd3c1e0e3c284a89891d2e4d9a7461f4": 3500}

# map uuid : bl centre dictionary i_trace : bl_centre dict
dict_itrace_manual_bl_center_ca1 = {}
dict_itrace_manual_bl_center_nc = {}

for uuid_mouseid_itrace in event_uuid_mouse_id_i_trace_ca1:
    uuid = uuid_mouseid_itrace[0]
    mouse_id = uuid_mouseid_itrace[1]
    i_trace = uuid_mouseid_itrace[2]
    
    if uuid in dict_uuid_manual_bl_center.keys():
        dict_itrace_manual_bl_center_ca1[i_trace] = dict_uuid_manual_bl_center[uuid]
for uuid_mouseid_itrace in event_uuid_mouse_id_i_trace_nc:
    uuid = uuid_mouseid_itrace[0]
    mouse_id = uuid_mouseid_itrace[1]
    i_trace = uuid_mouseid_itrace[2]
    
    if uuid in dict_uuid_manual_bl_center.keys():
        dict_itrace_manual_bl_center_nc[i_trace] = dict_uuid_manual_bl_center[uuid]
        
# re-calculate bl_windows_ca1, bl_windows_nc
for i_trace in dict_itrace_manual_bl_center_ca1.keys():
    bl_windows_ca1[i_trace] = (dict_itrace_manual_bl_center_ca1[i_trace] - half_window_width_frames, dict_itrace_manual_bl_center_ca1[i_trace] + half_window_width_frames)
for i_trace in dict_itrace_manual_bl_center_nc.keys():
    bl_windows_nc[i_trace] = (dict_itrace_manual_bl_center_nc[i_trace] - half_window_width_frames, dict_itrace_manual_bl_center_nc[i_trace] + half_window_width_frames)

## Calculate metrics for all data

In [None]:
x_recovery_ca1 = [[] for i in range(len(traces_ca1))]
y_recovery_ca1 = [[] for i in range(len(traces_ca1))]

x_recovery_nc = [[] for i in range(len(traces_nc))]
y_recovery_nc = [[] for i in range(len(traces_nc))]


for event_uuid, mouse_id, i_trace in event_uuid_mouse_id_i_trace_nc:
    x_data, y_data = get_recovery_data(traces_nc[i_trace], bl_windows_nc[i_trace][0], bl_windows_nc[i_trace][1], n_frames_before_nc)
    x_recovery_nc[i_trace] = x_data
    y_recovery_nc[i_trace] = y_data

    
for event_uuid, mouse_id, i_trace in event_uuid_mouse_id_i_trace_ca1:
    x_data, y_data = get_recovery_data(traces_ca1[i_trace], bl_windows_ca1[i_trace][0], bl_windows_ca1[i_trace][1], n_frames_before_ca1)
    x_recovery_ca1[i_trace] = x_data
    y_recovery_ca1[i_trace] = y_data
    
x_recovery_ca1 = np.array(x_recovery_ca1, dtype=np.int16)
x_recovery_nc = np.array(x_recovery_nc, dtype=np.int16)

y_recovery_ca1 = np.array(y_recovery_ca1,)
y_recovery_nc = np.array(y_recovery_nc, )
