Intended use case: given a file with assembled traces, open it, copy data, open new data, append, and write the extended dataset into a new file. Specific use: to add LFP+LabView-only dataset to assembled_traces CHR2 file

In [None]:
#Auto-reload modules (used to develop functions outside this notebook)
%load_ext autoreload
%autoreload 2

In [None]:
import os
import numpy as np
import pandas as pd
import labrotation.file_handling as fh
import datadoc_util as ddutil
from labrotation import two_photon_session as tps
import h5py
from datetime import datetime as dt
from datetime import timedelta
import matplotlib.pyplot as plt
import pyabf
import matlab.engine

In [None]:
def get_datetime_for_fname():
    now = dt.now()
    return f"{now.year:04d}{now.month:02d}{now.day:02d}-{now.hour:02d}{now.minute:02d}{now.second:02d}"

In [None]:
def create_dt(t):
    t1 = t[1:]
    t0 = t[:-1]
    dt = np.zeros(len(t))
    dt[1:] = t1 - t0
    dt[0] = dt[1]  # assume same step size to avoid 0
    return dt
def create_totdist_abs(speed, dt):
    totdist_abs = np.zeros(len(speed))
    totdist_abs[0] = speed[0]*dt[0]
    for i in range(1, len(totdist_abs)):
        totdist_abs[i] = totdist_abs[i-1] + abs(speed[i]*dt[i])
    return totdist_abs

In [None]:
env_dict = dict()
if not os.path.exists("./.env"):
    print(".env does not exist")
else:
    with open("./.env", "r") as f:
        for line in f.readlines():
            l = line.rstrip().split("=")
            env_dict[l[0]] = l[1]
print(env_dict.keys())

In [None]:
if "SERVER_SYMBOL" in env_dict.keys():
    SERVER_SYMBOL = env_dict["SERVER_SYMBOL"]
else:
    SERVER_SYMBOL = "R"
    print(f"Server symbol not found in .env file. Setting it to {SERVER_SYMBOL}")

In [None]:
matlab_2p_folder = env_dict["MATLAB_2P_FOLDER"]

In [None]:
output_dir = fh.open_dir("Choose export directory for results!")

In [None]:
ddoc = ddutil.DataDocumentation(env_dict["DATA_DOCU_FOLDER"])
ddoc.loadDataDoc()
ddoc.setDataDriveSymbol(SERVER_SYMBOL)
ddoc.checkFileConsistency()

## List recordings to be added 

In [None]:
df_new_recordings = ddoc.getRecordingsWithExperimentType(["jrgeco_bilat_ctl_bilat_lfpr", "jrgeco_bilat_sd_bilat_lfpr", "jrgeco_bilat_ctl_monor_lfpr", "jrgeco_bilat_sd_bilat_lfpl", "jrgeco_bilat_ctl_right_lfpr", ])
# do the filtering by mouse
dset_dict = {"no_nikon": ["WEZ-8946",]}

df_new_recordings = df_new_recordings[df_new_recordings["mouse_id"].isin(dset_dict["no_nikon"])]

In [None]:
for i, row in df_new_recordings.iterrows():
    print(f"{row.uuid}: {row.lfp}")

## Manually save them

In [None]:
uuid = "202c1dde3e5d4c07b9033d8c6f82b7ef"

In [None]:
files = ddoc.getSessionFilesForUuid(uuid).iloc[0]

In [None]:
fold = files.folder
lfp_fpath = os.path.join(fold, files["lfp"])
lv_fpath = os.path.join(fold, files["labview"])
lv_times_fpath = os.path.join(fold, os.path.splitext(files["labview"])[0]+"time.txt")

In [None]:

lv_t = []  # col 0 of ...time.txt files

with open(lv_times_fpath, "r") as f:
    for line in f.readlines():
        lv_t.append(float(line.split("\t")[0]))
lv_t = np.array(lv_t)
#lv_t = lv_t - lv_t[0]  # set to 0 starting time
#lv_t = lv_t/1000.  # convert to s

lv_rounds = []  # col 0
lv_speed = []  # col 1
lv_totdist = []  # col 2
lv_distancepr = []  # col 3
lv_stripes = []  # col 6
lv_stripespr = []  # col 7
with open(lv_fpath, "r") as f:
    for line in f.readlines():
        if len(line.strip().split("\t")) == 20:
            lv_rounds.append(int(line.strip().split("\t")[0]))
            lv_speed.append(float(line.strip().split("\t")[1]))
            lv_totdist.append(float(line.strip().split("\t")[2]))
            lv_distancepr.append(float(line.strip().split("\t")[3]))
            lv_stripes.append(float(line.strip().split("\t")[6]))
            lv_stripespr.append(float(line.strip().split("\t")[7]))
            
        else:
            print(len(line.strip().split("\t")))
lv_speed = np.array(lv_speed)
lv_rounds = np.array(lv_rounds)
lv_totdist = np.array(lv_totdist)
lv_distancepr = np.array(lv_distancepr)
lv_stripes = np.array(lv_stripes)
lv_stripespr = np.array(lv_stripespr)

lv_t = lv_t[:len(lv_speed)]  # cut out last, incomplete entry

# cut if lv_t shorter
lv_speed = lv_speed[:len(lv_t)]
lv_rounds = lv_rounds[:len(lv_t)]
lv_totdist = lv_totdist[:len(lv_t)]
lv_distancepr = lv_distancepr[:len(lv_t)]
lv_stripes = lv_stripes[:len(lv_t)]
lv_stripespr = lv_stripespr[:len(lv_t)]

assert len(lv_t) == len(lv_speed)

In [None]:
# run part of matlab pipeline
belt_struct = {"speed":lv_speed, "distance": lv_totdist, "round":lv_rounds, "distancePR": lv_distancepr, "stripes": lv_stripes, "stripesPR": lv_stripespr, "time": lv_t}
eng = matlab.engine.start_matlab()
m2p_path = eng.genpath(matlab_2p_folder)
eng.addpath(m2p_path, nargout=0)
belt_struct_proc = eng.beltCorrectWithoutNikon(belt_struct, nargout=1)

In [None]:
# convert to numpy 
belt_dict = dict()
for key in belt_struct_proc:   
    belt_dict[key] = np.squeeze(np.array(belt_struct_proc[key]))
belt_dict["time_s"] = belt_dict["time"]/1000.
belt_dict["time_s"] = belt_dict["time_s"] - belt_dict["time_s"][0]

In [None]:
lfp_file = pyabf.ABF(lfp_fpath)

In [None]:
lfp_file.setSweep(sweepNumber=0, channel=0)
lfp_t = lfp_file.sweepX
lfp_y = lfp_file.sweepY

lfp_file.setSweep(sweepNumber=0, channel=1)
lfp_loco = lfp_file.sweepY
lfp_t = lfp_t*1.0038
lfp_t = lfp_t - lfp_t[0]  # shift to 0

In [None]:
(belt_dict["time_s"][-1]-lfp_t[-1])/2.

In [None]:
bl_length = 300  # 5 min
stim_length = 4  # 4 sec

### Replicate some steps from the matlab processing step

In [None]:
exp_type = ddoc.getExperimentTypeForUuid(uuid)
mouse_id = ddoc.getMouseIdForUuid(uuid)
win_type = "None"

In [None]:
offset = 0.8

In [None]:
fig = plt.figure(figsize=(18,18))
plt.plot(belt_dict["time_s"]-offset, belt_dict["speed"])
plt.plot(lfp_t, lfp_loco-2.5)
plt.xlim((0, 100))
plt.show()

In [None]:
fig = plt.figure(figsize=(18,18))
plt.plot(belt_dict["time_s"]-offset, belt_dict["speed"])
plt.plot(lfp_t, lfp_loco-2.5)
plt.show()

In [None]:
fig = plt.figure(figsize=(18,18))
plt.plot(belt_dict["time_s"]-offset, belt_dict["speed"])
plt.plot(lfp_t, lfp_y)
plt.vlines(x=[bl_length, bl_length+stim_length], ymin=-1, ymax=1, color="red")
#plt.xlim((295, 390))
#plt.xlim((0, 20))
plt.show()
# TODO: come up with a constant offset, apply it to all recordings and check roughly if they match. Then decide whether use LFP or LabView (make sure labview > 10 min, so we can use loco quantities)
# TODO: then assemble the dataset, include in loco analysis

In [None]:
output_fpath = os.path.join(output_dir, os.path.splitext(os.path.split(lfp_fpath)[-1])[0] + "_segmented.h5")
print(output_fpath)

In [None]:
dt = create_dt(belt_dict["time_s"])
totdist_abs = create_totdist_abs(belt_dict["speed"], dt)

In [None]:
with h5py.File(output_fpath, "w") as hf:
    hf.attrs["uuid"] = uuid
    hf.attrs["stim_start"] = bl_length
    hf.attrs["poststim_start"] = bl_length + stim_length
    hf.attrs["exp_type"] = exp_type
    hf.attrs["win_type"] = win_type
    hf.attrs["mouse_id"] = mouse_id
    
    hf.create_dataset("lfp_mov_t", data=lfp_t)
    hf.create_dataset("lfp_mov_y", data=lfp_loco)    
    hf.create_dataset("lfp_t", data=lfp_t)    
    hf.create_dataset("lfp_y", data=lfp_y) 
    
    hf.create_dataset("lv_dist", data=belt_dict["distancePR"])     
    hf.create_dataset("lv_dt", data=dt) 
    hf.create_dataset("lv_speed", data=belt_dict["speed"]) 
    hf.create_dataset("lv_running", data=belt_dict["running"]) 
    hf.create_dataset("lv_t_s", data=belt_dict["time_s"]-offset) # match to lfp
    hf.create_dataset("lv_totdist", data=belt_dict["distance"])  # totdist - where does it come from? Is it distance?
    hf.create_dataset("lv_totdist_abs", data=totdist_abs) 
    
    

# Open all recordings, calculate metrics

In [None]:
dset_folder = "C:\\Data\\bilat"

In [None]:
dict_attrs = dict()
dict_data = dict()

for root, dirs, files in os.walk(dset_folder):
    for file in files:
        fpath = os.path.join(root, file)
        assert os.path.exists(fpath)
        with h5py.File(fpath, "r") as hf:
            dict_current_attrs = dict()
            for key in hf.attrs.keys():
                dict_current_attrs[key] = hf.attrs[key]
            uuid = dict_current_attrs["uuid"]
            dict_attrs[uuid] = dict_current_attrs
            
            dict_current_data = dict()
            for key in hf.keys():
                dict_current_data[key] = hf[key][:]
            dict_data[uuid] = dict_current_data

In [None]:
dict_attrs

### Calculate pre-stim, post stim metrics

In [None]:
dict_attrs

In [None]:
event_uuids = []
mouse_ids = []
window_types = []
exp_types = []
segment_types = []
segment_lengths = []
totdists = []
totdists_abs = []
runnings = []
totdists_norm = []
totdists_abs_norm = []

#df_stats["totdist_abs_norm"] = 10000*df_stats["totdist_abs"]/df_stats["segment_length"]  # for totdist_abs, can use 4500 as length

for uuid in dict_attrs.keys():
    mouse_id = dict_attrs[uuid]["mouse_id"]
    win_type = dict_attrs[uuid]["win_type"]
    exp_type = dict_attrs[uuid]["exp_type"]
    
    # separate loco data into pre-stim and post-stim
    dict_pre = dict()
    dict_post = dict()
    t_stim = dict_attrs[uuid]["stim_start"]
    t_poststim = dict_attrs[uuid]["poststim_start"] 
    t_data = dict_data[uuid]["lv_t_s"]
    
    i_pre = t_data < t_stim
    pre_length = np.sum(i_pre)
    
    # create matching post segment
    i_first_post = np.argmax(t_data >= t_poststim)
    i_post = np.zeros(t_data.shape)
    i_post[i_first_post:i_first_post+pre_length] = 1
    i_post = np.bool_(i_post)
    post_length = np.sum(i_post)
    if not pre_length == post_length:
        print(pre_length)
        print(post_length)
    #i_post = np.logical_and(t_data >= t_poststim, t_data < )
    
    post_length = pre_length
    for key in dict_data[uuid].keys():
        if "lv_" in key:
            data_arr = dict_data[uuid][key]
            dict_pre[key] = data_arr[i_pre]
            dict_post[key] = data_arr[i_post]
    # calculate values
    totdist_pre = dict_pre["lv_totdist"][-1] - dict_pre["lv_totdist"][0]
    totdist_post = dict_post["lv_totdist"][-1] - dict_post["lv_totdist"][0]
    
    totdist_abs_pre = dict_pre["lv_totdist_abs"][-1] - dict_pre["lv_totdist_abs"][0]
    totdist_abs_post = dict_post["lv_totdist_abs"][-1] - dict_post["lv_totdist_abs"][0]
    
    running_pre = np.sum(dict_pre["lv_running"])*4500/pre_length  # normalize to other data
    running_post = np.sum(dict_pre["lv_running"])*4500/post_length
    
    totdist_norm_pre = 10000*totdist_pre/4500.
    totdist_norm_post = 10000*totdist_post/4500.
    
    totdist_abs_norm_pre = 10000*totdist_abs_pre/4500.
    totdist_abs_norm_post = 10000*totdist_abs_post/4500.
    
    # add pre
    event_uuids.append(uuid)
    mouse_ids.append(mouse_id)
    window_types.append(win_type)
    exp_types.append(exp_type)
    segment_types.append("baseline")  # "post-stimulation"
    segment_lengths.append(4500)
    totdists.append(totdist_pre)
    totdists_abs.append(totdist_abs_pre)
    runnings.append(running_pre)
    totdists_norm.append(totdist_norm_pre)
    totdists_abs_norm.append(totdist_abs_norm_pre)    
    # add post
    event_uuids.append(uuid)
    mouse_ids.append(mouse_id)
    window_types.append(win_type)
    exp_types.append(exp_type)
    segment_types.append("post-stimulation")
    segment_lengths.append(4500)
    totdists.append(totdist_post)
    totdists_abs.append(totdist_abs_post)
    runnings.append(running_post)
    totdists_norm.append(totdist_norm_post)
    totdists_abs_norm.append(totdist_abs_norm_post)  

In [None]:
df_metrics = pd.DataFrame(data={"event_uuid": event_uuids, "mouse_id": mouse_ids, "window_type": window_types, "exp_type": exp_types, "segment_type": segment_types,
                  "segment_length": segment_lengths, "totdist": totdists, "totdist_abs": totdists_abs, "running": runnings, "totdist_norm": totdists_norm, "totdist_abs_norm": totdists_abs_norm})

In [None]:
df_metrics.to_excel("C:\\Data\\bilat_loco_metrics.xlsx", index=False)