# Important
In raw_traces, cumulative data, like total distance, time, ... are not matched between segments! I.e. expect jumps in the data. This should be circumvented in the assembled traces. 

In [None]:
#Auto-reload modules (used to develop functions outside this notebook)
%load_ext autoreload
%autoreload 2

In [None]:
import os
import numpy as np
import pandas as pd
import labrotation.file_handling as fh
import datadoc_util as ddutil
from labrotation import two_photon_session as tps
import h5py
from datetime import datetime as dt
from datetime import timedelta

In [None]:
LEN_BL_AM_FRAMES = 5000  # take 5000 frames before and after event

In [None]:
env_dict = dict()
if not os.path.exists("./.env"):
    print(".env does not exist")
else:
    with open("./.env", "r") as f:
        for line in f.readlines():
            l = line.rstrip().split("=")
            env_dict[l[0]] = l[1]
print(env_dict.keys())

In [None]:
ddoc = ddutil.DataDocumentation(env_dict["DATA_DOCU_FOLDER"])

In [None]:
ddoc.loadDataDoc()

### Define function for saving file with date time

In [None]:
def get_datetime_for_fname():
    now = dt.now()
    return f"{now.year:04d}{now.month:02d}{now.day:02d}-{now.hour:02d}{now.minute:02d}{now.second:02d}"

### Open excel file as dataframe

In [None]:
matlab_2p_folder = env_dict["MATLAB_2P_FOLDER"]

In [None]:
events_list_fpath = os.path.join(env_dict["DATA_DOCU_FOLDER"], "events_list.xlsx")
assert os.path.exists(events_list_fpath)

In [None]:
df_events = pd.read_excel(events_list_fpath)

### Filter dataframe
Only take seizure events (as of July 2023, only seizures/sz, and seizures with not enough baseline/sz_invalid_bl exist)

In [None]:
df_events = df_events[df_events["event_type"] == "sz"]

### Add length of interval

In [None]:
df_events["interval_length"] = df_events["end_frame"] - df_events["begin_frame"] + 1  # begin and end frames both inclusive

# Get corresponding traces

## Open directory where the files are located
Only necessary when there is no access to the server (the locations set in Data Documentation grouping files). The files should be grouped by folder, each folder named as the session uuid. If there was no lfp, add \_nolfp

In [None]:
traces_folder = fh.open_dir("Open folder containing uuid folders, each containing session files")

### Check that all necessary folders exist
If it does not throw an error, it means everything should be fine.

In [None]:
for uuid in df_events["recording_uuid"].unique():
    
    if not (os.path.exists(os.path.join(traces_folder, uuid)) or os.path.exists(os.path.join(traces_folder, f"{uuid}_nolfp"))):
        print(uuid)
        mouse_id = ddoc.getMouseIdForUuid(uuid)
        nik_name = ddoc.getNikonFileNameForUuid(uuid)
        print(f"\t{mouse_id} {nik_name}")
    else:
        session_folder = os.path.join(traces_folder, uuid)
        has_lfp = True
        if not os.path.exists(session_folder):
            session_folder = os.path.join(traces_folder, f"{uuid}_nolfp")
            has_lfp = False
        
        files = ddoc.getSessionFilesForUuuid(uuid).iloc[0].to_dict()
        
        fname_nd2 = files["nd2"]
        assert os.path.exists(os.path.join(session_folder, fname_nd2))
        
        fname_nikmeta = files["nikon_meta"]
        assert os.path.exists(os.path.join(session_folder, fname_nikmeta))
         
        fname_lv = files["labview"]
        assert os.path.exists(os.path.join(session_folder, fname_lv))
        
        fname_lvtime = os.path.splitext(files["labview"])[0] + "time.txt"
        assert os.path.exists(os.path.join(session_folder, fname_lvtime))
        
        fname_lfp = None
        if has_lfp:
            fname_lfp = files["lfp"]
            assert os.path.exists(os.path.join(session_folder, fname_lfp))
print("Consistency check done.")

## Assemble traces from multiple files

### Get manual LFP shift values

In [None]:
lfp_corrections_fpath = os.path.join(env_dict["DATA_DOCU_FOLDER"], "lfp_corrections.xlsx")
assert os.path.exists(lfp_corrections_fpath)

In [None]:
df_lfp_corrections = pd.read_excel(lfp_corrections_fpath, index_col="uuid")

### Open all sessions that appear in the dataset. Do the matching of traces
WARNING: this takes long!

In [None]:
sessions_dict = dict()
has_lfp_dict = dict()
for uuid in df_events["recording_uuid"].unique():
    print(uuid)
    session_folder = os.path.join(traces_folder, uuid)
    has_lfp = True
    if not os.path.exists(session_folder):
        session_folder = os.path.join(traces_folder, f"{uuid}_nolfp")
        has_lfp = False
    files = ddoc.getSessionFilesForUuuid(uuid).iloc[0].to_dict()
    
    fname_nd2 = files["nd2"]
    fpath_nd2 = os.path.join(session_folder, fname_nd2)
    
    fname_nikmeta = files["nikon_meta"]
    fpath_nikmeta = os.path.join(session_folder, fname_nikmeta)
    
    fname_lv = files["labview"]
    fname_lvtime = os.path.splitext(files["labview"])[0] + "time.txt"
    fpath_lv = os.path.join(session_folder, fname_lv)
    fpath_lvtime = os.path.join(session_folder, fname_lvtime)
    
    fname_lfp = None
    fpath_lfp = None
    if has_lfp:
        fname_lfp = files["lfp"]
        fpath_lfp = os.path.join(session_folder, fname_lfp)
    
    session = tps.TwoPhotonSession.init_and_process(fpath_nd2, fpath_nikmeta, fpath_lv, fpath_lvtime, fpath_lfp, matlab_2p_folder)
    if session.has_lfp():
        t_lfp_shift = df_lfp_corrections.loc[uuid].lfp_manual_delay
        session.shift_lfp(t_lfp_shift)
        print(f"\t\t{uuid} LFP shifted by {t_lfp_shift} s")
    else:
        print(f"\t\t{uuid} has no LFP")
    sessions_dict[uuid] = session
    has_lfp_dict[uuid] = session.has_lfp()

### Group by event. Loop over entries, extract corresponding frames

### Test that baseline and aftermath add up to 5000 frames each in all cases

In [None]:
for (event_uuid, ival_type), g in df_events.groupby(["event_uuid", "interval_type"]):
    if ival_type in ["bl", "am"]:
        assert g["interval_length"].sum() == LEN_BL_AM_FRAMES

### Extract traces
Define data structure (dict):

* event_uuid as outermost key
* interval_type (bl, sz, am) as next key
* event_index as next key
* inside event_index, the various traces are stored as key: array pairs


In [None]:
sessions_traces_dict = dict()
sessions_nik_tstamps_dict = dict()
for event_uuid, g_uuid in df_events.groupby("event_uuid"):
    
    sessions_traces_dict[event_uuid] = dict()
    sessions_nik_tstamps_dict[event_uuid] = dict()
    
    for ival_type, g in g_uuid.groupby("interval_type"):
        sessions_traces_dict[event_uuid][ival_type] = dict()
        sessions_nik_tstamps_dict[event_uuid][ival_type] = dict()
        
        # loop through all segments (parts of different recordings) making up an interval (baseline, sz, aftermath)
        for i_segment, segment in g.sort_values(by=["event_index"]).iterrows():
            # find correct TwoPhotonSession object
            segment_uuid = segment["recording_uuid"]
            segment_session = sessions_dict[segment_uuid]
            
            # get labview data
            lv_dist = segment_session.belt_scn_dict['distance']
            lv_speed = segment_session.belt_scn_dict['speed']
            lv_running = segment_session.belt_scn_dict['running']
            lv_totdist = segment_session.belt_scn_dict['totdist']
            lv_rounds = segment_session.belt_scn_dict['rounds']
            lv_t = segment_session.belt_scn_dict['tsscn']/1000.  # switch from ms to s
            
            # get nikon data
            mean_fluo = segment_session.mean_fluo
            
            # get lfp data
            # lfp already matched to labview. lfp and movement channels t values should be same, but save them to be sure
            
            if segment_session.has_lfp():
                lfp_t, lfp_y = segment_session.lfp_lfp()
                lfp_mov_t, lfp_mov_y = segment_session.lfp_movement()
            else:
                lfp_t = lv_t.copy()
                lfp_mov_t = lv_t.copy()
                
                lfp_y = np.zeros(len(lfp_t))
                lfp_mov_y = np.zeros(len(lfp_t))
            
            # cut to segment size
            begin_frame = segment["begin_frame"] - 1  # switch to 0-indexing
            end_frame = segment["end_frame"]  # array[begin_frame:end_frame] does not include end_frame, so keep it shifted to right by 1
            
            
            # match lfp time to frames first
            t_begin = lv_t[begin_frame]
            
            if end_frame < len(mean_fluo):  # avoid skipping a lot of data points between adjacent frames
                t_end = lv_t[end_frame]
            else:
                t_end = lv_t[end_frame-1]
            
            lfp_t_flags = np.where((lfp_t > t_begin) & (lfp_t < t_end))[0]
            lfp_mov_t_flags = np.where((lfp_mov_t > t_begin) & (lfp_mov_t < t_end))[0]
            
            lfp_t = lfp_t[lfp_t_flags]
            lfp_y = lfp_y[lfp_t_flags]
            
            lfp_mov_t = lfp_mov_t[lfp_mov_t_flags]
            lfp_mov_y = lfp_mov_y[lfp_mov_t_flags]
            
            # cut all data that is matched to nikon frames
            lv_dist = lv_dist[begin_frame:end_frame]
            lv_speed = lv_speed[begin_frame:end_frame]
            lv_running = np.array(lv_running[begin_frame:end_frame], dtype=np.uint8)
            lv_totdist = lv_totdist[begin_frame:end_frame]
            lv_rounds = np.array(lv_rounds[begin_frame:end_frame], dtype=np.uint8)
            lv_t = lv_t[begin_frame:end_frame]
            
            mean_fluo = mean_fluo[begin_frame:end_frame]
            
            # get Nikon time stamps of first and last frame in segment
            nik_tstamp_begin = segment_session.nikon_time_stamp(begin_frame)  # begin frame is in zero indexing, so no change
            nik_tstamp_end = segment_session.nikon_time_stamp(end_frame-1)  # end frame marks open interval, need to point to true last element

            
            # create dictionary of the data
            data_dict = dict()  # create a dict with the name-array pairs for each data source
            
            data_dict["lv_dist"] = lv_dist
            data_dict["lv_speed"] = lv_speed
            data_dict["lv_running"] = lv_running
            data_dict["lv_totdist"] = lv_totdist
            data_dict["lv_rounds"] = lv_rounds
            data_dict["lv_t_s"] = lv_t  # mark units to emphasize switching from ms to s
            
            data_dict["mean_fluo"] = mean_fluo
            
            data_dict["lfp_t"] = lfp_t
            data_dict["lfp_y"] = lfp_y
            data_dict["lfp_mov_t"] = lfp_mov_t
            data_dict["lfp_mov_y"] = lfp_mov_y
            
            # copy dictionary into large dict
            sessions_traces_dict[event_uuid][ival_type][segment["event_index"]] = data_dict.copy()
            sessions_nik_tstamps_dict[event_uuid][ival_type][segment["event_index"]] = (nik_tstamp_begin, nik_tstamp_end)           

# Save raw traces

In [None]:
output_dir = fh.open_dir("Choose export directory for results!")

In [None]:
output_fpath = os.path.join(output_dir, f"raw_traces_{get_datetime_for_fname()}.h5")
print(f"Saving to {output_fpath}")

In [None]:
with h5py.File(output_fpath, "w") as hf:
    for event_uuid in sessions_traces_dict.keys():
        uuid_grp = hf.create_group(event_uuid)
        for interval_type in sessions_traces_dict[event_uuid].keys():
            ival_type_grp = uuid_grp.create_group(interval_type)
            for event_index in sessions_traces_dict[event_uuid][interval_type].keys():
                event_ind_grp = ival_type_grp.create_group(str(event_index))
                for trace_name in sessions_traces_dict[event_uuid][interval_type][event_index].keys():
                    event_ind_grp.create_dataset(trace_name, data=sessions_traces_dict[event_uuid][interval_type][event_index][trace_name])

# Create joint traces
Create dictionary with event_uuid as outermost key. Inside it, there should be the different named arrays

In [None]:
arr1 = np.array([1, 2, 2.5, 5])
arr2 = np.array([0.1, 0.2, 0.1, 0.1])

In [None]:
arr1*arr2

In [None]:
def create_dt(t):
    t1 = t[1:]
    t0 = t[:-1]
    dt = np.zeros(len(t))
    dt[1:] = t1 - t0
    dt[0] = dt[1]  # assume same step size to avoid 0
    return dt
def create_totdist_abs(speed, dt):
    totdist_abs = np.zeros(len(speed))
    totdist_abs[0] = speed[0]*dt[0]
    for i in range(1, len(totdist_abs)):
        totdist_abs[i] = totdist_abs[i-1] + abs(speed[i]*dt[i])
    return totdist_abs

In [None]:
joint_session_traces_dict = dict()
joint_session_metadata_dict = dict()

t_last_segment_nik_end = None # for creating gaps where seizure recordings were interrupted
previous_recording_uuid = None #

for event_uuid in sessions_traces_dict.keys():
    print(event_uuid)
    lv_dist = np.array([])
    lv_speed = np.array([])
    lv_running = np.array([], dtype=np.uint8)
    lv_totdist = np.array([])
    lv_rounds = np.array([], dtype=np.uint8)
    lv_totdist_abs = np.array([])
    lv_dt = np.array([])
    lv_t_s = np.array([])
    mean_fluo = np.array([])
    lfp_t = np.array([])
    lfp_y = np.array([])
    lfp_mov_t = np.array([])
    lfp_mov_y = np.array([])
    
    # break_points: the first frame index of each patch (i.e. piece of video that makes up baseline, ...)
    # break_points_lfp: the corresponding first frame index of each segment in lfp
    # segment_type_break_points: the first frame index of each segment (bl, sz, am)
    # segment_type_break_points_lfp: same for lfp
    event_metadata_dict = {"break_points": [], "break_points_lfp": [], "segment_type_break_points":[], "segment_type_break_points_lfp":[], "recording_break_points":[], "recording_break_points_lfp":[]}
    
    # frame 0 is the first index of the first segment/patch
    event_metadata_dict["break_points"].append(0)
    event_metadata_dict["break_points_lfp"].append(0)
    event_metadata_dict["segment_type_break_points"].append(0)
    event_metadata_dict["segment_type_break_points_lfp"].append(0)
    event_metadata_dict["recording_break_points"].append(0)
    event_metadata_dict["recording_break_points_lfp"].append(0)
    
    i_segment = 0  # keep count for checking consistency
    for bl_event_index in sorted(sessions_traces_dict[event_uuid]["bl"].keys()):
        #print(f"{i_segment} {bl_event_index}")
        assert bl_event_index == i_segment
        # concatenate data that need not be matched to previous segment
        lv_speed_curr = np.array(sessions_traces_dict[event_uuid]["bl"][bl_event_index]["lv_speed"])  # also need for totdist_abs
        lv_dist = np.concatenate([lv_dist, np.array(sessions_traces_dict[event_uuid]["bl"][bl_event_index]["lv_dist"])])
        lv_speed = np.concatenate([lv_speed, lv_speed_curr])
        lv_running = np.concatenate([lv_running, np.array(sessions_traces_dict[event_uuid]["bl"][bl_event_index]["lv_running"])])
        
        
        mean_fluo = np.concatenate([mean_fluo, np.array(sessions_traces_dict[event_uuid]["bl"][bl_event_index]["mean_fluo"])])
        lfp_y = np.concatenate([lfp_y, np.array(sessions_traces_dict[event_uuid]["bl"][bl_event_index]["lfp_y"])])
        lfp_mov_y = np.concatenate([lfp_mov_y, np.array(sessions_traces_dict[event_uuid]["bl"][bl_event_index]["lfp_mov_y"])])
        
        # handle time values differently: need to adjust them such that all start from 0, and 
        # beginning of segment is matched to end of previous segment
        lv_t_s_curr = np.array(sessions_traces_dict[event_uuid]["bl"][bl_event_index]["lv_t_s"])
        lfp_t_curr = np.array(sessions_traces_dict[event_uuid]["bl"][bl_event_index]["lfp_t"])
        lfp_mov_t_curr = np.array(sessions_traces_dict[event_uuid]["bl"][bl_event_index]["lfp_mov_t"])
        
        current_recording_uuid = df_events[(df_events["event_uuid"] == event_uuid) & (df_events["event_index"] == bl_event_index)].recording_uuid.values[0]
        # handle cumulative quantities differently: match them to last element
        lv_totdist_curr = np.array(sessions_traces_dict[event_uuid]["bl"][bl_event_index]["lv_totdist"])
        lv_rounds_curr = np.array(sessions_traces_dict[event_uuid]["bl"][bl_event_index]["lv_rounds"])
        lv_dt_curr = np.array(create_dt(sessions_traces_dict[event_uuid]["bl"][bl_event_index]["lv_t_s"]))
        lv_totdist_abs_curr = np.array(create_totdist_abs(lv_speed_curr, lv_dt_curr))
        
        
        # dt is not cumulative
        lv_dt = np.concatenate([lv_dt, lv_dt_curr])
        
        if len(lv_t_s) == 0:  # first entry, shift to 0 start
            lv_t_s = np.concatenate([lv_t_s, lv_t_s_curr - lv_t_s_curr[0]])
            lfp_t = np.concatenate([lfp_t, lfp_t_curr - lfp_t_curr[0]])
            lfp_mov_t = np.concatenate([lfp_mov_t, lfp_mov_t_curr - lfp_mov_t_curr[0]])
            # no need to match cumulative data
            lv_totdist = np.concatenate([lv_totdist, lv_totdist_curr])
            lv_rounds = np.concatenate([lv_rounds, lv_rounds_curr])
            
            lv_totdist_abs = np.concatenate([lv_totdist_abs, lv_totdist_abs_curr])
            
        else:  # match current segment to end of previous segment
            # add break point info first
            if (current_recording_uuid != previous_recording_uuid) and (previous_recording_uuid is not None):
                event_metadata_dict["recording_break_points"].append(len(lv_t_s))
                event_metadata_dict["recording_break_points_lfp"].append(len(lfp_t))
            event_metadata_dict["break_points"].append(len(lv_t_s))
            event_metadata_dict["break_points_lfp"].append(len(lfp_t))
            # get average time step in the lfp/lv data, take this as the time step between previous segment last and current
            # segment first entry
            dt_lv_t_s_avg = (lv_t_s_curr[1:] - lv_t_s_curr[:-1]).mean()
            dt_lfp_t_avg = (lfp_t_curr[1:] - lfp_t_curr[:-1]).mean()
            dt_lfp_mov_t_avg = (lfp_mov_t_curr[1:] - lfp_mov_t_curr[:-1]).mean()
            # once we have dt, need to modify arrays by (-[0] + dt)
            lv_t_s = np.concatenate([lv_t_s, lv_t_s_curr - lv_t_s_curr[0] + lv_t_s[-1] + dt_lv_t_s_avg])
            lfp_t = np.concatenate([lfp_t, lfp_t_curr - lfp_t_curr[0] + lfp_t[-1] + dt_lfp_t_avg])
            lfp_mov_t = np.concatenate([lfp_mov_t, lfp_mov_t_curr - lfp_mov_t_curr[0] + lfp_mov_t[-1] + dt_lfp_mov_t_avg])
            lv_dt = np.concatenate([lv_dt, lv_dt_curr])
            # match cumulative data to last element
            lv_totdist_last = lv_totdist[-1]
            lv_rounds_last = lv_rounds[-1]
            lv_totdist_abs_last = lv_totdist_abs[-1]
            lv_totdist = np.concatenate([lv_totdist, lv_totdist_curr - lv_totdist_curr[0] + lv_totdist_last])
            lv_rounds = np.concatenate([lv_rounds, lv_rounds_curr - lv_rounds_curr[0] + lv_rounds_last])
            lv_totdist_abs = np.concatenate([lv_totdist_abs, lv_totdist_abs_curr - lv_totdist_abs_curr[0] + lv_totdist_abs_last])
            
        previous_recording_uuid = current_recording_uuid
        
        # currently no need for bl timestamps, but implement the algorithm just in case in future
        # the gaps should be also included in baseline
        # get end timestamp from the tuple (nik_tstamp_begin, nik_tstamp_end) 
        t_last_segment_nik_end = sessions_nik_tstamps_dict[event_uuid]["bl"][bl_event_index][1] 
               
        i_segment += 1
        
    # add first frame index of sz segment
    event_metadata_dict["segment_type_break_points"].append(len(lv_t_s))
    event_metadata_dict["segment_type_break_points_lfp"].append(len(lfp_t))
    
    
    # only include gap for seizures where the seizure started during the break between the two recordings
    if event_uuid == "f0442bebcd1a4291a8d0559eb47df08e":
        t_last_segment_nik_end += timedelta(seconds=60)
    elif event_uuid == "f481149fa8694621be6116cb84ae2d3c":
        t_last_segment_nik_end += timedelta(seconds=28)
    elif event_uuid == "7753b03a2a554cccaab42f1c0458d742":
        t_last_segment_nik_end += timedelta(seconds=150)
    elif event_uuid == "54c31c3151944cfd86043932d3a19b9a":
        t_last_segment_nik_end += timedelta(seconds=115)
    else:
        t_last_segment_nik_end = None
            
    
    for sz_event_index in sorted(sessions_traces_dict[event_uuid]["sz"].keys()):
        #print(f"{i_segment} {sz_event_index}")
        assert sz_event_index == i_segment
        lv_speed_curr = np.array(sessions_traces_dict[event_uuid]["sz"][sz_event_index]["lv_speed"])
        
        lv_dist = np.concatenate([lv_dist, np.array(sessions_traces_dict[event_uuid]["sz"][sz_event_index]["lv_dist"])])
        lv_speed = np.concatenate([lv_speed, lv_speed_curr])
        lv_running = np.concatenate([lv_running, np.array(sessions_traces_dict[event_uuid]["sz"][sz_event_index]["lv_running"])])
        #lv_t_s = np.concatenate([lv_t_s, sessions_traces_dict[event_uuid]["sz"][sz_event_index]["lv_t_s"]])
        mean_fluo = np.concatenate([mean_fluo, np.array(sessions_traces_dict[event_uuid]["sz"][sz_event_index]["mean_fluo"])])
        #lfp_t = np.concatenate([lfp_t, sessions_traces_dict[event_uuid]["sz"][sz_event_index]["lfp_t"]])
        lfp_y = np.concatenate([lfp_y, np.array(sessions_traces_dict[event_uuid]["sz"][sz_event_index]["lfp_y"])])
        #lfp_mov_t = np.concatenate([lfp_mov_t, sessions_traces_dict[event_uuid]["sz"][sz_event_index]["lfp_mov_t"]])
        lfp_mov_y = np.concatenate([lfp_mov_y, np.array(sessions_traces_dict[event_uuid]["sz"][sz_event_index]["lfp_mov_y"])])

        # match cumulative data to last element
        lv_totdist_curr = np.array(sessions_traces_dict[event_uuid]["sz"][sz_event_index]["lv_totdist"])
        lv_rounds_curr = np.array(sessions_traces_dict[event_uuid]["sz"][sz_event_index]["lv_rounds"])
        lv_dt_curr = np.array(create_dt(sessions_traces_dict[event_uuid]["sz"][sz_event_index]["lv_t_s"]))
        lv_totdist_abs_curr = np.array(create_totdist_abs(lv_speed_curr, lv_dt_curr))
        
        
        lv_totdist_last = lv_totdist[-1]
        lv_rounds_last = lv_rounds[-1]
        lv_totdist_abs_last = lv_totdist_abs[-1]
        
        lv_totdist = np.concatenate([lv_totdist, lv_totdist_curr - lv_totdist_curr[0] + lv_totdist_last])
        lv_rounds = np.concatenate([lv_rounds, lv_rounds_curr - lv_rounds_curr[0] + lv_rounds_last])
        lv_totdist_abs = np.concatenate([lv_totdist_abs, lv_totdist_abs_curr - lv_totdist_abs_curr[0] + lv_totdist_abs_last])
        
        lv_t_s_curr = np.array(sessions_traces_dict[event_uuid]["sz"][sz_event_index]["lv_t_s"])
        lfp_t_curr = np.array(sessions_traces_dict[event_uuid]["sz"][sz_event_index]["lfp_t"])
        lfp_mov_t_curr = np.array(sessions_traces_dict[event_uuid]["sz"][sz_event_index]["lfp_mov_t"])
        # use same method of matching to end of previous segment as in bl
        dt_lv_t_s_avg = (lv_t_s_curr[1:] - lv_t_s_curr[:-1]).mean()
        dt_lfp_t_avg = (lfp_t_curr[1:] - lfp_t_curr[:-1]).mean()
        dt_lfp_mov_t_avg = (lfp_mov_t_curr[1:] - lfp_mov_t_curr[:-1]).mean()
        
        # add break points
        current_recording_uuid = df_events[(df_events["event_uuid"] == event_uuid) & (df_events["event_index"] == sz_event_index)].recording_uuid.values[0]
        if (current_recording_uuid != previous_recording_uuid) and (previous_recording_uuid is not None):
            event_metadata_dict["recording_break_points"].append(len(lv_t_s))
            event_metadata_dict["recording_break_points_lfp"].append(len(lfp_t))
        previous_recording_uuid = current_recording_uuid
        event_metadata_dict["break_points"].append(len(lv_t_s))
        event_metadata_dict["break_points_lfp"].append(len(lfp_t))
        
        if t_last_segment_nik_end is None:
            # once we have dt, need to modify arrays by (-current[0] + last[-1] + dt)
            lv_t_s = np.concatenate([lv_t_s, lv_t_s_curr - lv_t_s_curr[0] + lv_t_s[-1] + dt_lv_t_s_avg])
            lfp_t = np.concatenate([lfp_t, lfp_t_curr - lfp_t_curr[0] + lfp_t[-1] + dt_lfp_t_avg])
            lfp_mov_t = np.concatenate([lfp_mov_t, lfp_mov_t_curr - lfp_mov_t_curr[0] + lfp_mov_t[-1] + dt_lfp_mov_t_avg])
        else:
            # for sz only (for now), add to dt the time difference between the last frame of the previous segment and
            # the first frame of the current segment
            t_current_segment_nik_start = sessions_nik_tstamps_dict[event_uuid]["sz"][sz_event_index][0] 
            dt_nik_frames = abs(t_current_segment_nik_start - t_last_segment_nik_end).total_seconds()
            
            # introduce possible gap coming from session split up in two nikon recordings:
            # lv_t_s is from belt_scn_dict, i.e. time poitns are in scanner time frame
            lv_t_s = np.concatenate([lv_t_s, lv_t_s_curr - lv_t_s_curr[0] + lv_t_s[-1] + dt_nik_frames])
            lfp_t = np.concatenate([lfp_t, lfp_t_curr - lfp_t_curr[0] + lfp_t[-1] + dt_nik_frames])
            lfp_mov_t = np.concatenate([lfp_mov_t, lfp_mov_t_curr - lfp_mov_t_curr[0] + lfp_mov_t[-1] + dt_nik_frames])
        
        t_last_segment_nik_end = sessions_nik_tstamps_dict[event_uuid]["sz"][sz_event_index][1] 
        
        i_segment += 1
    # add first frame index of am segment
    event_metadata_dict["segment_type_break_points"].append(len(lv_t_s))
    event_metadata_dict["segment_type_break_points_lfp"].append(len(lfp_t))
    
    
    for am_event_index in sorted(sessions_traces_dict[event_uuid]["am"].keys()):
        #print(f"{i_segment} {am_event_index}")
        assert am_event_index == i_segment
        lv_speed_curr = np.array(sessions_traces_dict[event_uuid]["am"][am_event_index]["lv_speed"])
        
        lv_dist = np.concatenate([lv_dist, np.array(sessions_traces_dict[event_uuid]["am"][am_event_index]["lv_dist"])])
        lv_speed = np.concatenate([lv_speed, lv_speed_curr])
        lv_running = np.concatenate([lv_running, np.array(sessions_traces_dict[event_uuid]["am"][am_event_index]["lv_running"])])
        #lv_t_s = np.concatenate([lv_t_s, np.array(sessions_traces_dict[event_uuid]["am"][am_event_index]["lv_t_s"])])
        mean_fluo = np.concatenate([mean_fluo, np.array(sessions_traces_dict[event_uuid]["am"][am_event_index]["mean_fluo"])])
        #lfp_t = np.concatenate([lfp_t, np.array(sessions_traces_dict[event_uuid]["am"][am_event_index]["lfp_t"])])
        lfp_y = np.concatenate([lfp_y, np.array(sessions_traces_dict[event_uuid]["am"][am_event_index]["lfp_y"])])
        #lfp_mov_t = np.concatenate([lfp_mov_t, np.array(sessions_traces_dict[event_uuid]["am"][am_event_index]["lfp_mov_t"])])
        lfp_mov_y = np.concatenate([lfp_mov_y, np.array(sessions_traces_dict[event_uuid]["am"][am_event_index]["lfp_mov_y"])])
        lv_dt_curr = np.array(create_dt(sessions_traces_dict[event_uuid]["am"][am_event_index]["lv_t_s"]))
        
        # match cumulative data to last element
        lv_totdist_curr = np.array(sessions_traces_dict[event_uuid]["am"][am_event_index]["lv_totdist"])
        lv_rounds_curr = np.array(sessions_traces_dict[event_uuid]["am"][am_event_index]["lv_rounds"])
        lv_totdist_abs_curr = np.array(create_totdist_abs(lv_speed_curr, lv_dt_curr))
        lv_totdist_last = lv_totdist[-1]
        lv_rounds_last = lv_rounds[-1]
        lv_totdist_abs_last = lv_totdist_abs[-1]
        lv_totdist = np.concatenate([lv_totdist, lv_totdist_curr - lv_totdist_curr[0] + lv_totdist_last])
        lv_rounds = np.concatenate([lv_rounds, lv_rounds_curr - lv_rounds_curr[0] + lv_rounds_last])
        lv_totdist_abs = np.concatenate([lv_totdist_abs, lv_totdist_abs_curr - lv_totdist_abs_curr[0] + lv_totdist_abs_last])
        
        
        lv_t_s_curr = np.array(sessions_traces_dict[event_uuid]["am"][am_event_index]["lv_t_s"])
        lfp_t_curr = np.array(sessions_traces_dict[event_uuid]["am"][am_event_index]["lfp_t"])
        lfp_mov_t_curr = np.array(sessions_traces_dict[event_uuid]["am"][am_event_index]["lfp_mov_t"])
        # use same method of matching to end of previous segment as in bl
        dt_lv_t_s_avg = (lv_t_s_curr[1:] - lv_t_s_curr[:-1]).mean()
        dt_lfp_t_avg = (lfp_t_curr[1:] - lfp_t_curr[:-1]).mean()
        dt_lfp_mov_t_avg = (lfp_mov_t_curr[1:] - lfp_mov_t_curr[:-1]).mean()
        
        # add break points for segment concatenated below
        event_metadata_dict["break_points"].append(len(lv_t_s))
        event_metadata_dict["break_points_lfp"].append(len(lfp_t))
        current_recording_uuid = df_events[(df_events["event_uuid"] == event_uuid) & (df_events["event_index"] == am_event_index)].recording_uuid.values[0]
        if (current_recording_uuid != previous_recording_uuid) and (previous_recording_uuid is not None):
            event_metadata_dict["recording_break_points"].append(len(lv_t_s))
            event_metadata_dict["recording_break_points_lfp"].append(len(lfp_t))
        previous_recording_uuid = current_recording_uuid
        
        # once we have dt, need to modify arrays by (-current[0] + last[-1] + dt)
        lv_t_s = np.concatenate([lv_t_s, lv_t_s_curr - lv_t_s_curr[0] + lv_t_s[-1] + dt_lv_t_s_avg])
        lfp_t = np.concatenate([lfp_t, lfp_t_curr - lfp_t_curr[0] + lfp_t[-1] + dt_lfp_t_avg])
        lfp_mov_t = np.concatenate([lfp_mov_t, lfp_mov_t_curr - lfp_mov_t_curr[0] + lfp_mov_t[-1] + dt_lfp_mov_t_avg])
        
        t_last_segment_nik_end = sessions_nik_tstamps_dict[event_uuid]["am"][am_event_index][1] 
        
        i_segment += 1
        
    print()
    joint_session_traces_dict[event_uuid] = {"lv_dist":lv_dist, "lv_speed":lv_speed,"lv_running":lv_running,"lv_totdist":lv_totdist, "lv_totdist_abs": lv_totdist_abs, "lv_dt": lv_dt, "lv_rounds":lv_rounds,"lv_t_s":lv_t_s,"lfp_t":lfp_t,"lfp_y":lfp_y,"lfp_mov_t":lfp_mov_t,"lfp_mov_y":lfp_mov_y,"mean_fluo":mean_fluo}
    joint_session_metadata_dict[event_uuid] = event_metadata_dict.copy()

### Save file

In [None]:
output_assembled_fpath = os.path.join(output_dir, f"assembled_traces_{get_datetime_for_fname()}.h5")
print(f"Saving stitched-together traces to {output_assembled_fpath}")

In [None]:
with h5py.File(output_assembled_fpath, "w") as hf:
    for event_uuid in joint_session_traces_dict.keys():
        event_uuid_grp = hf.create_group(event_uuid)
        df_attributes = df_events[df_events["event_uuid"] == event_uuid]
        
        event_uuid_grp.attrs["session_uuids"] = df_attributes["recording_uuid"].unique()
        event_uuid_grp.attrs["has_lfp"] = [has_lfp_dict[uuid] for uuid in df_attributes["recording_uuid"].unique()]
        event_uuid_grp.attrs["window_type"] = df_attributes["window_type"].unique()[0]  # same event should happen for same window!
        event_uuid_grp.attrs["n_frames"] = len(joint_session_traces_dict[event_uuid]["mean_fluo"])
        event_uuid_grp.attrs["mouse_id"] = df_attributes["mouse_id"].unique()[0]
        event_uuid_grp.attrs["n_lfp_steps"] = len(joint_session_traces_dict[event_uuid]["lfp_t"])
        event_uuid_grp.attrs["n_lfp_mov_steps"] = len(joint_session_traces_dict[event_uuid]["lfp_mov_t"])
        event_uuid_grp.attrs["n_bl_frames"] = LEN_BL_AM_FRAMES
        event_uuid_grp.attrs["n_am_frames"] = LEN_BL_AM_FRAMES
        
        for attr_name in joint_session_metadata_dict[event_uuid].keys():
            event_uuid_grp.attrs[attr_name] = joint_session_metadata_dict[event_uuid][attr_name]
        
        for array_name in joint_session_traces_dict[event_uuid].keys():
            event_uuid_grp.create_dataset(array_name, data=joint_session_traces_dict[event_uuid][array_name])

In [None]:
# TODO: TwoPhotonSession should have totdist_abs now. Can simply take it