In [4]:
# ------------------------------------------------------------------------------ #
# @Author:        F. Paul Spitzner
# @Email:         paul.spitzner@ds.mpg.de
# @Created:       2023-08-04 11:59:06
# @Last Modified: 2023-08-04 11:59:15
# ------------------------------------------------------------------------------ #
# This notebook shows how to combine three sources of data needed to get
# one dataframe that has everything. (@ps -> make a helper function?)
# - load spikes from multiple hdf5 files, recursively.
# - load analysis results of the hdestimator and mrestimator dataframe
# - load metrics from the allan institute csv files
#
# This was needed because dumping spike-times in our currently chosen format
# in an all-you can-eat hdf5 dataframe was not possible.
# ------------------------------------------------------------------------------ #

%reload_ext autoreload
%autoreload 2
%reload_ext ipy_dict_hierarchy
%reload_ext watermark

import logging
logging.basicConfig(
    format="%(asctime)s | %(levelname)-8s | %(name)-s > %(funcName)-s > %(message)s",
    level=logging.WARNING,
)
log = logging.getLogger("notebook")
log.setLevel("DEBUG")

import re
import glob
import h5py
import sys
import os
import numpy as np
import xarray as xr
import pandas as pd
import dask

from tqdm import tqdm


# also needs to be added for each dask-worker
extra_path = os.path.abspath('../')
sys.path.append(extra_path)
log.info(f"project directory: {extra_path}")

from ana import utility as utl
# utl.log.setLevel("DEBUG")


2023-08-14 10:55:21,326 | INFO     | notebook > <module> > project directory: /Users/paul/para/2_Projects/information_timescales/repo/_latest/experiment_analysis


In [5]:
# utl.log.setLevel("DEBUG")
# specify the path as closely as possible, we search recursively through all subdirs
data_dir = "../../../gnode/experiment_analysis/dat/"
meta_df = utl.all_unit_metadata(data_dir, reload=False)
meta_df = utl.load_spikes(meta_df)
meta_df = utl.default_filter(meta_df, trim=False)
meta_df = utl.merge_blocks(meta_df)
meta_df

2023-08-14 10:55:22,461 | DEBUG    | its_utility > all_unit_metadata > Found 61 hdf5 files in /Users/paul/para/2_Projects/information_timescales/repo/gnode/experiment_analysis/dat
Fetching metadata from sessions:   0%|          | 0/61 [00:00<?, ?it/s]2023-08-14 10:55:22,477 | INFO     | its_utility > all_unit_metadata > Skipping /Users/paul/para/2_Projects/information_timescales/repo/gnode/experiment_analysis/dat/meta_df_final.h5. This might be a hdf5 file with no session data.
2023-08-14 10:55:22,479 | INFO     | its_utility > all_unit_metadata > Skipping /Users/paul/para/2_Projects/information_timescales/repo/gnode/experiment_analysis/dat/meta_df_merged_final.h5. This might be a hdf5 file with no session data.
2023-08-14 10:55:22,480 | INFO     | its_utility > all_unit_metadata > Skipping /Users/paul/para/2_Projects/information_timescales/repo/gnode/experiment_analysis/dat/meta_df_2_final.h5. This might be a hdf5 file with no session data.
Fetching metadata from sessions: 100%|██████

Unnamed: 0,session,stimulus,block,unit_id,ecephys_structure_acronym,invalid_spiketimes_check,recording_length,firing_rate,filepath,num_spikes,spiketimes
0,787025148,natural_movie_one_more_repeats,3.0,951013153,VISam,SUCCESS,900.668806,21.542880,/Users/paul/para/2_Projects/information_timesc...,19403,[[[<xarray.DataArray (spiketimes: 30719)>\narr...
1,787025148,natural_movie_one_more_repeats,3.0,951013143,VISam,SUCCESS,900.718039,11.652925,/Users/paul/para/2_Projects/information_timesc...,10496,[[[<xarray.DataArray (spiketimes: 30719)>\narr...
2,787025148,natural_movie_one_more_repeats,3.0,951013133,VISam,SUCCESS,894.867666,0.448111,/Users/paul/para/2_Projects/information_timesc...,401,[[[<xarray.DataArray (spiketimes: 30719)>\narr...
3,787025148,natural_movie_one_more_repeats,3.0,951013202,VISam,SUCCESS,900.265072,2.384853,/Users/paul/para/2_Projects/information_timesc...,2147,[[[<xarray.DataArray (spiketimes: 30719)>\narr...
4,787025148,natural_movie_one_more_repeats,3.0,951013187,VISam,SUCCESS,896.074534,0.706414,/Users/paul/para/2_Projects/information_timesc...,633,[[[<xarray.DataArray (spiketimes: 30719)>\narr...
...,...,...,...,...,...,...,...,...,...,...,...
43684,847657808,natural_movie_one_more_repeats,merged_3.0_and_8.0,951190716,LP,SUCCESS,1067.306152,3.486347,/Users/paul/para/2_Projects/information_timesc...,3721,"[<xarray.DataArray ()>\narray(3.1584473, dtype..."
43685,847657808,natural_movie_one_more_repeats,merged_3.0_and_8.0,951190722,LP,SUCCESS,1077.341553,2.770709,/Users/paul/para/2_Projects/information_timesc...,2985,"[<xarray.DataArray ()>\narray(0.40185547, dtyp..."
43686,847657808,natural_movie_one_more_repeats,merged_3.0_and_8.0,951190724,LP,SUCCESS,1076.488525,2.043682,/Users/paul/para/2_Projects/information_timesc...,2200,"[<xarray.DataArray ()>\narray(1.0895996, dtype..."
43687,847657808,natural_movie_one_more_repeats,merged_3.0_and_8.0,951190819,LP,SUCCESS,1075.182861,2.104758,/Users/paul/para/2_Projects/information_timesc...,2263,"[<xarray.DataArray ()>\narray(1.2353516, dtype..."


In [6]:
analysed_df = pd.read_hdf(f"{data_dir}/meta_df_merged_final.h5", "/meta_df")
analysed_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ecephys_structure_acronym,invalid_spiketimes_check,recording_length,firing_rate,filepath,num_spikes,R_tot,tau_R,tau_single,tau_double,tau_R_details,tau_single_details,tau_double_details
unit_id,stimulus,session,block,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
950987325,natural_movie_one_more_repeats,774875821,3.0,VISam,SUCCESS,898.087879,1.999804,/data.nst/share/data/allen_visual_coding_neuro...,1796,0.028897,0.292095,2.204573,2.204866,"{'firing_rate': 1.9997995746528745, 'firing_ra...","{'tau': 2.2045730546241966, 'mre': 0.997734557...","{'tau': 2.2048661904516664, 'mre': 0.997734858..."
950987344,natural_movie_one_more_repeats,774875821,3.0,VISam,SUCCESS,898.868180,0.837720,/data.nst/share/data/allen_visual_coding_neuro...,753,0.254426,0.016040,0.049767,0.045802,"{'firing_rate': 0.8332684370376139, 'firing_ra...","{'tau': 0.04976678697174655, 'mre': 0.90441349...","{'tau': 0.04580227717841129, 'mre': 0.89658257..."
950987340,natural_movie_one_more_repeats,774875821,3.0,VISam,SUCCESS,900.287381,1.568388,/data.nst/share/data/allen_visual_coding_neuro...,1412,0.039074,0.057306,0.263706,0.233516,"{'firing_rate': 1.5594974952515301, 'firing_ra...","{'tau': 0.26370610494040975, 'mre': 0.98121811...","{'tau': 0.23351615919178326, 'mre': 0.97881581..."
950987352,natural_movie_one_more_repeats,774875821,3.0,VISam,SUCCESS,894.181973,0.542395,/data.nst/share/data/allen_visual_coding_neuro...,485,0.074347,0.126436,0.657249,0.594182,"{'firing_rate': 0.5412750157965074, 'firing_ra...","{'tau': 0.6572492090313748, 'mre': 0.992421399...","{'tau': 0.5941821956040216, 'mre': 0.991620379..."
950987362,natural_movie_one_more_repeats,774875821,3.0,VISam,SUCCESS,900.613482,2.053045,/data.nst/share/data/allen_visual_coding_neuro...,1849,0.037868,0.110199,0.778819,0.749355,"{'firing_rate': 2.036386247175541, 'firing_rat...","{'tau': 0.7788194682871189, 'mre': 0.993600590...","{'tau': 0.7493554822530509, 'mre': 0.993349810..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
951127614,natural_movie_one_more_repeats,839557629,merged_3.0_and_8.0,VISp,SUCCESS,818.458008,0.058647,/data.nst/share/data/allen_visual_coding_neuro...,48,0.011261,0.000000,5.241771,0.001162,"{'firing_rate': 0.05864672677956161, 'firing_r...","{'tau': 5.2417714824133785, 'mre': 0.999046578...","{'tau': 0.0011617555281603106, 'mre': 0.013516..."
951128175,natural_movie_one_more_repeats,839557629,merged_3.0_and_8.0,VISl,SUCCESS,913.291992,0.174095,/data.nst/share/data/allen_visual_coding_neuro...,159,0.094832,0.075662,0.136336,0.115506,"{'firing_rate': 0.1740948981435352, 'firing_ra...","{'tau': 0.13633607252766394, 'mre': 0.96399026...","{'tau': 0.11550580320940909, 'mre': 0.95763567..."
951185417,natural_movie_one_more_repeats,847657808,merged_3.0_and_8.0,VISam,ERR_REC_LEN,957.861328,0.220282,/data.nst/share/data/allen_visual_coding_neuro...,211,0.159574,0.230245,1.579377,1.579400,"{'firing_rate': 0.22028156368590562, 'firing_r...","{'tau': 1.5793768186950516, 'mre': 0.996839200...","{'tau': 1.579400109924387, 'mre': 0.9968392468..."
951185493,natural_movie_one_more_repeats,847657808,merged_3.0_and_8.0,VISam,ERR_REC_LEN,863.650879,1.269031,/data.nst/share/data/allen_visual_coding_neuro...,1096,0.126121,0.178080,1.103750,0.676409,"{'firing_rate': 1.2690252473499257, 'firing_ra...","{'tau': 1.1037496155641024, 'mre': 0.995480232...","{'tau': 0.6764090276893551, 'mre': 0.992635276..."


In [7]:
meta_df.set_index(analysed_df.index.names, inplace=True, drop=True)
meta_df = utl.strict_merge_dfs_by_index(meta_df, analysed_df)
meta_df.reset_index(inplace=True, drop=False)



In [9]:
utl.log.setLevel("DEBUG")
meta_df = utl.load_metrics(meta_df, data_dir)


2023-08-14 11:00:29,910 | DEBUG    | its_utility > load_metrics > Loaded columns ['unit_id', 'g_dsi_dg', 'mod_idx_dg', 'image_selectivity_ns'] from /Users/paul/para/2_Projects/information_timescales/repo/gnode/experiment_analysis/dat/brain_observatory_unit_metrics_filtered.csv
2023-08-14 11:00:30,101 | DEBUG    | its_utility > load_metrics > Loaded columns ['unit_id', 'g_dsi_dg', 'mod_idx_dg'] from /Users/paul/para/2_Projects/information_timescales/repo/gnode/experiment_analysis/dat/functional_connectivity_analysis_metrics.csv
2023-08-14 11:00:30,102 | INFO     | its_utility > load_metrics > Column g_dsi_dg found in multiple dataframes.
2023-08-14 11:00:30,102 | INFO     | its_utility > load_metrics > Column mod_idx_dg found in multiple dataframes.
2023-08-14 11:00:30,104 | DEBUG    | its_utility > load_metrics > Matched 19671 rows from meta_df in /Users/paul/para/2_Projects/information_timescales/repo/gnode/experiment_analysis/dat/brain_observatory_unit_metrics_filtered.csv
2023-08-14