In [1]:
#Auto-reload modules (used to develop functions outside this notebook)
%load_ext autoreload
%autoreload 2

In [2]:
import labrotation.file_handling as fh
import h5py
from time import time
import matplotlib.pyplot as plt
import numpy as np
import os
from labrotation import file_handling as fh
from copy import deepcopy
import pandas as pd
import labrotation.two_photon_session as tps
import seaborn as sns
import uuid  # for unique labeling of sessions and coupling arrays (mouse velocity, distance, ...) to sessions in dataframe 
from matplotlib import cm  # colormap
import datadoc_util
from labrotation import two_photon_session as tps

# If exists, load environmental variables from .env file

In [3]:
env_dict = dict()
if not os.path.exists("./.env"):
    print(".env does not exist")
else:
    with open("./.env", "r") as f:
        for line in f.readlines():
            l = line.rstrip().split("=")
            env_dict[l[0]] = l[1]
print(env_dict.keys())

dict_keys(['DATA_DOCU_FOLDER', 'DOWNLOADS_FOLDER', 'LOG_FOLDER', 'MATLAB_2P_FOLDER', 'FLUO_LV_MATCHED_FOLDER'])


# Set up data documentation directory

In [4]:
# assumption: inside the documentation folder, the subfolders carry the id of each mouse (not exact necessarily, but they 
# can be identified by the name of the subfolder). 
# Inside the subfolder xy (for mouse xy), xy_grouping.xlsx and xy_segmentation.xlsx can be found.
# xy_grouping.xlsx serves the purpose of finding the recordings belonging together, and has columns:
# folder, nd2, labview, lfp, face_cam_last, nikon_meta, experiment_type, day
# xy_segmentation.xlsx contains frame-by-frame (given by a set of disjoint intervals forming a cover for the whole recording) 
# classification of the events in the recording ("normal", seizure ("sz"), sd wave ("sd_wave") etc.). The columns:
# folder, interval_type, frame_begin, frame_end.

# TODO: write documentation on contents of xlsx files (what the columns are etc.)
if "DATA_DOCU_FOLDER" in env_dict.keys():
    docu_folder = env_dict["DATA_DOCU_FOLDER"]
else:
    docu_folder = fh.open_dir("Choose folder containing folders for each mouse!")
print(f"Selected folder:\n\t{docu_folder}")

Selected folder:
	D:\PhD\Data documentation


In [5]:
if "documentation" in os.listdir(docu_folder):
    mouse_folder = os.path.join(docu_folder, "documentation")
else:
    mouse_folder = docu_folder
mouse_names = os.listdir(mouse_folder)
print(f"Mice detected:")
for mouse in mouse_names:
    print(f"\t{mouse}")

Mice detected:
	T301
	T303
	T324
	T329
	T333
	T337
	T352
	T370
	T386
	T391
	T396
	T452
	T534


In [6]:
mouse_folder

'D:\\PhD\\Data documentation\\documentation'

### Load matlab-2p

In [7]:
if "MATLAB_2P_FOLDER" in env_dict.keys():
    matlab_2p_folder = env_dict["MATLAB_2P_FOLDER"]
else:
    matlab_2p_folder = fh.open_dir("Choose matlab-2p folder")
print(f"matlab-2p folder set to:\n\t{matlab_2p_folder}")

matlab-2p folder set to:
	D:\PhD\matlab-2p


In [8]:
df_seg_complete = pd.DataFrame(columns = ["nd2", "interval_type", "frame_begin", "frame_end"])
df_grouping_complete = pd.DataFrame(columns = ["folder", "nd2", "labview", "lfp", "face_cam_last", "nikon_meta", "experiment_type", "mouse_id", "day"])

for mouse_id in mouse_names:
    print(mouse_id)
    seg_fpath = os.path.join(mouse_folder, mouse_id, mouse_id + '_segmentation.xlsx')
    grouping_fpath = os.path.join(mouse_folder, mouse_id, mouse_id + '_grouping.xlsx')
    if os.path.exists(seg_fpath) and os.path.exists(grouping_fpath):
        df_seg = pd.read_excel(seg_fpath)
        df_grouping = pd.read_excel(grouping_fpath)
        df_grouping["mouse_id"] = mouse_id
        # select only tmev, chr2_szsd, chr2_sd, chr2_ctl experiment data first
        df_grouping = df_grouping[df_grouping["experiment_type"].isin(["tmev", "tmev_ctl", "chr2_sd", "chr2_szsd", "chr2_ctl"])]
        # merge into large dataframes
        # print(f"\tseg bef: {len(df_seg_complete['nd2'])}")
        df_seg_complete = pd.concat([df_seg_complete, df_seg])
        # print(f"\tseg aft: {len(df_seg_complete['nd2'])}")
        # print(f"\tgro bef: {len(df_grouping_complete['nd2'])}")
        df_grouping_complete = pd.concat([df_grouping_complete, df_grouping])
        # print(f"\tgro aft: {len(df_grouping_complete['nd2'])}")
    else:
        print(f"Check if you set the correct folder (folder containing all subfolders with mouse names):")
        if not os.path.exists(seg_fpath):
            print(f"\t{seg_fpath} not found")
        if not os.path.exists(grouping_fpath):
            print(f"\t{grouping_fpath} not found")
        

T301
T303
T324
T329
T333
T337
T352
T370
T386
T391
T396
T452
T534


In [9]:
ddoc = datadoc_util.DataDocumentation(docu_folder)
ddoc.loadDataDoc()

In [10]:
# take only recordings that were classified as "tmev" (experiment type)
df_seg_complete.where(df_seg_complete["nd2"].isin(df_grouping_complete["nd2"].unique()), inplace=True)
# wrong recording types changed to NaN; drop them
df_seg_complete.dropna(inplace=True)

# Pre-/post-ictal locomotion

In [11]:
# take only videos with seizure
df_sz_movies = df_seg_complete.groupby("nd2").filter(lambda group: "sz" in group["interval_type"].unique())

## Add uuid

In [12]:
df_sz_movies = pd.merge(df_sz_movies, ddoc.getNikonFileNameUuid().dropna(), on="nd2")

### Filter for only tmev-type recording

In [13]:
df_sz_movies["type"] = df_sz_movies.apply(lambda row: df_grouping_complete[df_grouping_complete["uuid"] == row["uuid"]].experiment_type.values[0], axis=1)

In [14]:
df_sz_movies = df_sz_movies[df_sz_movies["type"] == "tmev"]

### Add previous recording to cases where seizure starts on frame 1

In [15]:
df_sz_movies

Unnamed: 0,nd2,interval_type,frame_begin,frame_end,uuid,type
0,T301_tmev_d1.270820.1151.nd2,normal,1,8672,a6099849121f44ccbec237037971ab57,tmev
1,T301_tmev_d1.270820.1151.nd2,sz,8673,9147,a6099849121f44ccbec237037971ab57,tmev
2,T301_tmev_d1.270820.1151.nd2,sd_wave,9148,9231,a6099849121f44ccbec237037971ab57,tmev
3,T301_tmev_d1.270820.1151.nd2,sd_wave,9232,9301,a6099849121f44ccbec237037971ab57,tmev
4,T301_tmev_d1.270820.1151.nd2,sd_extinction,9302,10307,a6099849121f44ccbec237037971ab57,tmev
...,...,...,...,...,...,...
781,T534_tmev_d2_extra_20220816_002.nd2,sz,4611,5301,757c430daa2349e198ddefa7a0277769,tmev
782,T534_tmev_d2_extra_20220816_002.nd2,normal,5302,17978,757c430daa2349e198ddefa7a0277769,tmev
783,T534_tmev_d2_extra_20220816_003.nd2,normal,1,15333,92062a977958443e83011619b34eabb8,tmev
784,T534_tmev_d2_extra_20220816_003.nd2,sz,15334,15934,92062a977958443e83011619b34eabb8,tmev


In [16]:
# 3dd896d33a0f42c698228fbe254ebd60 contains seizure from frame 1; previous recording is 21c83d0b69ec4585a9a11f4ce6c24b99
#pd.merge(df_sz_movies, ddoc.getNikonFileNameUuid().dropna(), on="nd2")
uuids_to_add = [("21c83d0b69ec4585a9a11f4ce6c24b99", "3dd896d33a0f42c698228fbe254ebd60")]  # pairs of (uuid of previous recording, uuid of sz recording)
for uuid, uuid_sz in uuids_to_add:
    nd2_to_add = df_grouping_complete[df_grouping_complete["uuid"] == uuid].nd2.values[0]
    group_to_add = df_seg_complete[df_seg_complete["nd2"] == nd2_to_add].to_dict()
    group_to_add["uuid"] = uuid
    group_to_add["uuid_sz"] = uuid_sz
    group_to_add = pd.DataFrame(group_to_add)
    group_to_add["type"] = group_to_add.apply(lambda row: df_grouping_complete.loc[df_grouping_complete["uuid"] == row["uuid"]].experiment_type.values[0], axis=1)
    df_sz_movies = pd.concat([df_sz_movies, group_to_add])
    
#df_seg_complete.groupby("nd2").filter(lambda group: "sz" in group["interval_type"].unique())
#previous_recording = 

## Add seizure-uuid
See 4 Directionality analysis for original context. This code should be the same as there! (If update needed, need to extract into third file!)
In this analysis, the purpose is to make each seizure unique (to deal with seizures split-up in two videos, for example)

In [17]:
df_sz_movies["uuid_sz"] = df_sz_movies["uuid"]
# following two recordings contain 1 seizure-sd event
df_sz_movies["uuid_sz"] = df_sz_movies["uuid_sz"].replace("65bff16a4cf04930a5cb14f489a8f99b", "30dc55d1a5dc4b0286d132e72f208ca6")
# following recordings do not have sz
#qdf = qdf[qdf["uuid_matched"] != "171693d0988c458a96c8198c7b8cfc28"]

In [18]:
df_sz_intervals = df_sz_movies[df_sz_movies["interval_type"] == "sz"]

In [19]:
for i_g, g in df_sz_intervals.sort_values(["nd2"]).groupby("uuid_sz"):  # assume seizures cut in two have incremented names
    print(f'{g[g["interval_type"] == "sz"].frame_begin.values}: {g[g["interval_type"] == "sz"].uuid.values}')
    # check if one recording contained several seizures:
    n_seizures = len(g[g["interval_type"] == "sz"].uuid_sz.unique())
    n_recordings = len(g[g["interval_type"] == "sz"].uuid.unique())
    print(f"{n_seizures} sz: {n_recordings} recs")
    
    
    # group by uuid_sz?
    # TODO: need some way to sort by recording starting time too.

[5412]: ['06ebcf354f5c41519669f187e16de364']
1 sz: 1 recs
[7896]: ['2aa75aa234a749668eb896e7e00aa87a']
1 sz: 1 recs
[17733 1]: ['65bff16a4cf04930a5cb14f489a8f99b' '30dc55d1a5dc4b0286d132e72f208ca6']
1 sz: 2 recs
[1]: ['3dd896d33a0f42c698228fbe254ebd60']
1 sz: 1 recs
[131]: ['44ca941252064dcabb0fe3d24a8dab49']
1 sz: 1 recs
[263 14785]: ['4fe45b25dc854453880cd868fe77e9d4' '4fe45b25dc854453880cd868fe77e9d4']
1 sz: 1 recs
[10291]: ['5ea6fd9c4cb542dbbc1f65305725cede']
1 sz: 1 recs
[2684]: ['73a27053f4bf4ae1b4ad96064b6dabc0']
1 sz: 1 recs
[4611]: ['757c430daa2349e198ddefa7a0277769']
1 sz: 1 recs
[8070]: ['79fb974821f34e3abdcf5ca650e1c0f4']
1 sz: 1 recs
[1]: ['8dd54649e47046239ebafc56eeb8b5b2']
1 sz: 1 recs
[15334]: ['92062a977958443e83011619b34eabb8']
1 sz: 1 recs
[8673]: ['a6099849121f44ccbec237037971ab57']
1 sz: 1 recs
[1]: ['a9694ce2973349cb9cb6b51f77c46b49']
1 sz: 1 recs
[8601]: ['ae564f8c867f4f35aa971b6562c33a7c']
1 sz: 1 recs
[1]: ['b8f31023d2c042c2a7f95b54d9807cb7']
1 sz: 1 recs
[5476

In [20]:
# loop through each seizure. Find recording, find 5 minutes before beginning of sz, during sz + sd, and 5 minutes after end of sz
# 1. for each sz (uuid_sz), find ordered recordings
# 2. find beginning of sz = first recording with a "sz" category
# 3. find uuid of the recording, open loco data, extract locomotion of 5 min before sz begin
# 4. find end of sz = last recording with a "sz" or "sd_wave" category
# 5. find uuid of the recording, open loco data, extract locomotion of 5 min after sz end

In [21]:
df_sz_intervals["i_sz"] = df_sz_intervals.groupby("uuid").cumcount() + 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sz_intervals["i_sz"] = df_sz_intervals.groupby("uuid").cumcount() + 1


### Sort by recordings (ASSUMPTION: recording indexing is incremental)
Once using groupby uuid_sz, the recordings where the same seizure is present should then be ordered from the first recording it appears in to the last.

In [22]:
df_sz_intervals = df_sz_intervals.sort_values("nd2")

### Make uuid_sz truly unique
Append the index of the seizure, i.e. uuid -> uuid_1, uuid_2 etc.

In [23]:
df_sz_intervals["uuid_sz"] = df_sz_intervals.apply(lambda row: row.uuid_sz + "_" +  str(row.i_sz), axis=1)

### Create data structure of intervals
For each seizure, the entry should contain the baseline (5 min), sz+sd, post-sz (5 min) periods. If one period spreads over multiple recordings, then this will be an array.
The data structure:

A dictionary of the uuid_sz values, each has as its value another dictionary, with keys "baseline", "sz", "aftermath". The corresponding values are lists of tuples. Each tuple contains the uuid of the session, the beginning frame (1-indexing) and end frame in that session (both inclusive! i.e. in case of 1 and 10, the segment is 1 to 10 inclusive, the segment having a length of 10 frames). If the list of tuples has more than one entry, it means that the given interval (baseline, sz, aftermath) spreads over multiple sessions (i.e. recordings).

Example:

{

\<uuid_sz1\>: 

{

  "baseline": \[ (\<uuid1\>, \<begin_frame\>, \<end_frame\>), (\<uuid2\>, \<begin_frame\>, \<end_frame\>)  \],

  "sz": \[ ( \<uuid\>, \<begin_frame\>, \<end_frame\> ) \],
  
  "aftermath": \[ ( \<uuid\>, \<begin_frame\>, \<end_frame\> ) \],
  
  }
  
  
  \<uuid_sz2\>: {...}

}

In [24]:
RECORDING_FRAMERATE = 15.0  # in Hz
BL_LEN_S = 5*60  # baseline desired (approximate) length, in seconds
AM_LEN_S = 5*60  # aftermath desired (approximate) in seconds

bl_len_frames = RECORDING_FRAMERATE * BL_LEN_S
am_len_frames = RECORDING_FRAMERATE * AM_LEN_S

In [25]:
df_grouping_complete[df_grouping_complete.uuid == "3dd896d33a0f42c698228fbe254ebd60"]

Unnamed: 0,folder,nd2,labview,lfp,face_cam_last,nikon_meta,experiment_type,mouse_id,day,uuid
3,Y:\AG-Wenzel\Group\tmev\T396-1\T396-1_tmev_d3,T396-1_tmev_d3_26062021_004.nd2,T396-1_tmev_d3.260621.1239.txt,21626007.abf,T396-1_tmev_d3.260621.1239_2.avi,T396-1_tmev_d3.260621.1239_nik.txt,tmev,T396,3,3dd896d33a0f42c698228fbe254ebd60


In [26]:
df_sz_movies[df_sz_movies.uuid == "3dd896d33a0f42c698228fbe254ebd60"]

Unnamed: 0,nd2,interval_type,frame_begin,frame_end,uuid,type,uuid_sz
773,T396-1_tmev_d3_26062021_004.nd2,sz,1,490,3dd896d33a0f42c698228fbe254ebd60,tmev,3dd896d33a0f42c698228fbe254ebd60
774,T396-1_tmev_d3_26062021_004.nd2,normal,491,17978,3dd896d33a0f42c698228fbe254ebd60,tmev,3dd896d33a0f42c698228fbe254ebd60


In [27]:
sz_intervals_dict = dict()
for uuid_sz, g in df_sz_intervals.sort_values("nd2").groupby(["uuid_sz"]):
    sz_entry = []
    bl_entry = []  # baseline
    am_entry = []  # aftermath
    
    # 1. get sz data.
    assert len(g) <= 2  # for now, only deal with cases where segment is in single recording or in two recordings. 
    # If more recordings (highly unlikely), need to switch to array-based approach
    sz_begin_uuid = g.iloc[0].uuid
    sz_end_uuid = g.iloc[-1].uuid  # assume len(g) == 1 or == 2!!!
    # Assume only seizure is split up, the baseline and aftermath never span over two recordings
    sz_begin_session = df_grouping_complete.loc[df_grouping_complete["uuid"] == sz_begin_uuid]
    sz_end_session = df_grouping_complete.loc[df_grouping_complete["uuid"] == sz_end_uuid]
    # define seizure begin and end frames (might be from different recordings!) to aid acquiring baseline and aftermath frames
    sz_begin_frame = g.iloc[0].frame_begin
    sz_end_frame = g.iloc[-1].frame_end
    
    if sz_begin_uuid == sz_end_uuid:  # seizure entirely in a single recording
        sz_entry.append((sz_begin_uuid, sz_begin_frame, sz_end_frame))
    else:
        assert len(g) == 2  # make sure only up to 2 recordings, as this is the only other case handled
        sz_entry.append((sz_begin_uuid, sz_begin_frame, g.iloc[0].frame_end))
        sz_entry.append((sz_end_uuid, g.iloc[1].frame_begin, sz_end_frame))
    # get baseline
    bl_uuid = sz_begin_uuid
    bl_end_frame = sz_begin_frame - 1
    bl_begin_frame = sz_begin_frame - bl_len_frames
    if sz_begin_frame == 1:  # seizure begins with recording; happens at least once
        # need to take previous recording end as baseline
    elif bl_begin_frame < 1:  # not enough baseline available in recording
        # TODO: modify exception to warning? No way to add baseline in front anyway... Recordings are never
        # directly after another; it might be possible to take previous recording somehow, and 
        raise Exception(f"Error: not enough baseline available for {sz_begin_uuid}. Seizure begins on frame {sz_begin_frame}. Need at least {bl_len_frames} frames before for {BL_LEN_S} seconds of baseline")
        # bl_begin_frame = 1
    bl_entry.append((bl_uuid, bl_begin_frame, bl_end_frame))
    # get aftermath
    am_uuid = sz_end_uuid
    am_begin_frame = sz_end_frame + 1
    am_end_frame = sz_end_frame + am_len_frames
    # test if aftermath would fall outside last "normal" interval in recording:
    if am_end_frame > df_sz_movies[(df_sz_movies["uuid"] == am_uuid) & (df_sz_movies["interval_type"] == "normal")].frame_end.max():
        raise Exception(f"Error: not enough aftermath available for {sz_end_uuid}. Seizure ends on frame {sz_end_frame}. Need {am_len_frames} frames after for {AM_LEN_S} seconds of aftermath")
    am_entry.append((am_uuid, am_begin_frame, am_end_frame))

IndentationError: expected an indented block (2655821045.py, line 31)