In [1]:
# ------------------------------------------------------------------------------ #
# @Author:        F. Paul Spitzner
# @Email:         paul.spitzner@ds.mpg.de
# @Created:       2023-08-04 11:59:06
# @Last Modified: 2023-08-04 11:59:15
# ------------------------------------------------------------------------------ #
# Run on the cluster, using dask.
# Analyses all units and saves a large dataframe with everything
# that is needed.
# ------------------------------------------------------------------------------ #

%reload_ext autoreload
%autoreload 2
%reload_ext ipy_dict_hierarchy
%reload_ext watermark

import logging
logging.basicConfig(
    format="%(asctime)s | %(levelname)-8s | %(name)-s > %(funcName)-s > %(message)s",
    level=logging.WARNING,
)
log = logging.getLogger("notebook")
log.setLevel("DEBUG")

import re
import glob
import h5py
import sys
import numpy as np
import xarray as xr
import pandas as pd
import dask

from tqdm import tqdm

sys.path.append('../')

from ana import utility as utl

# specify the path as closely as possible, we search recursively through all subdirs
data_dir = "../../../gnode/experiment_analysis/dat/"
meta_df = utl.all_unit_metadata(data_dir, reload=False)
meta_df = utl.load_spikes(meta_df)
meta_df = utl.default_filter(meta_df, trim=True)
meta_df = utl.merge_blocks(meta_df)
meta_df.tail()


2023-08-09 16:46:01,617 | DEBUG    | ana.utility > all_unit_metadata > Found 58 hdf5 files in /Users/paul/para/2_Projects/information_timescales/repo/gnode/experiment_analysis/dat
Fetching metadata from sessions: 100%|██████████| 58/58 [00:00<00:00, 64.85it/s]
2023-08-09 16:46:02,540 | DEBUG    | ana.utility > load_spikes > Loading spikes for 12584 units, 31190 rows for pandas dataframe.
Loading spikes for sessions: 100%|██████████| 58/58 [00:36<00:00,  1.60it/s]
2023-08-09 16:46:38,830 | DEBUG    | ana.utility > default_filter > Default quality checks, valid rows before: 30912
2023-08-09 16:46:38,836 | DEBUG    | ana.utility > default_filter > After rate check: 30842
2023-08-09 16:46:38,841 | DEBUG    | ana.utility > default_filter > After zero-length check: 30842
2023-08-09 16:46:38,856 | DEBUG    | ana.utility > default_filter > After minmum-duration check: 17646
Merging blocks for units: 100%|██████████| 11999/11999 [00:16<00:00, 747.15it/s]
2023-08-09 16:46:55,233 | DEBUG    | ana

Unnamed: 0,unit_id,stimulus,session,block,ecephys_structure_acronym,invalid_spiketimes_check,recording_length,firing_rate,filepath,num_spikes,spiketimes
23288,951190716,natural_movie_one_more_repeats,847657808,merged_3.0_and_8.0,LP,SUCCESS,1067.306152,3.486347,/Users/paul/para/2_Projects/information_timesc...,3721,"[<xarray.DataArray ()>\narray(3.1584473, dtype..."
23289,951190722,natural_movie_one_more_repeats,847657808,merged_3.0_and_8.0,LP,SUCCESS,1077.341553,2.770709,/Users/paul/para/2_Projects/information_timesc...,2985,"[<xarray.DataArray ()>\narray(0.40185547, dtyp..."
23290,951190724,natural_movie_one_more_repeats,847657808,merged_3.0_and_8.0,LP,SUCCESS,1076.488525,2.043682,/Users/paul/para/2_Projects/information_timesc...,2200,"[<xarray.DataArray ()>\narray(1.0895996, dtype..."
23291,951190819,natural_movie_one_more_repeats,847657808,merged_3.0_and_8.0,LP,SUCCESS,1075.182861,2.104758,/Users/paul/para/2_Projects/information_timesc...,2263,"[<xarray.DataArray ()>\narray(1.2353516, dtype..."
23292,951190848,natural_movie_one_more_repeats,847657808,merged_3.0_and_8.0,VISrl,SUCCESS,1071.963867,0.80693,/Users/paul/para/2_Projects/information_timesc...,865,"[<xarray.DataArray ()>\narray(1.5703125, dtype..."


False

In [2]:
hde_settings = {
    "number_of_bootstraps_R_tot": 0,
    "number_of_bootstraps_R_max": 250,
    "timescale_minimum_past_range": 30/1000,
    "embedding_number_of_bins_set": [5],
    "estimation_method": "shuffling",
    "persistent" : False,
    "embedding_past_range_set": [
        0.005, 0.00561, 0.00629, 0.00706, 0.00792,
        0.00889, 0.00998, 0.01119, 0.01256, 0.01409, 0.01581, 0.01774,
        0.01991, 0.02233, 0.02506, 0.02812, 0.03155, 0.0354, 0.03972, 0.04456,
        0.05, 0.0561, 0.06295,
        0.06441, 0.06591, 0.06745, 0.06902, 0.07063, 0.07227, 0.07396, 0.07568,
        0.07744, 0.07924, 0.08109, 0.08298, 0.08491, 0.08689, 0.08891, 0.09099,
        0.0931, 0.09527, 0.09749, 0.09976, 0.10209, 0.10446, 0.1069, 0.10939,
        0.11194, 0.11454, 0.11721, 0.11994, 0.12274, 0.12559, 0.12852, 0.13151,
        0.13458, 0.13771, 0.14092, 0.1442, 0.14756, 0.151, 0.15451, 0.15811,
        0.1618, 0.16557, 0.16942, 0.17337, 0.17741, 0.18154, 0.18577, 0.19009,
        0.19452, 0.19905, 0.20369, 0.20843, 0.21329, 0.21826, 0.22334,
        0.25059, 0.28117, 0.31548, 0.35397, 0.39716, 0.44563, 0.5, 0.56101, 0.62946,
        0.70627, 0.79245, 0.88914, 0.99763, 1.11936, 1.25594, 1.40919,
        1.58114, 1.77407, 1.99054, 2.23342, 2.50594, 2.81171, 3.15479,
        3.53973, 3.97164, 4.45625, 5.0,
    ],
}

mre_settings = {
    "bin_size" : 0.005, # 5 ms
    "tmin" : 0.03,
    "tmax" : 10.0,
}

In [3]:
def mre_wrapper(data, settings):

    import logging

    logging.getLogger("mrestimator").setLevel("INFO")

    import mrestimator as mre

    data = data.squeeze()
    assert data.ndim == 1, "data must be 1D, this is the simple one-unit wrapper"

    binned_spikes = utl.binned_spike_count(data, bin_size=settings["bin_size"])

    rk = mre.coefficients(
        binned_spikes,
        method="ts",  # method does not matter for single unit
        steps=(
            int(settings["tmin"] / settings["bin_size"]),
            int(settings["tmax"] / settings["bin_size"]),
        ),
        dt=settings["bin_size"],
        dtunit="s",
    )

    fit_single_ts = mre.fit(rk, fitfunc=mre.f_exponential_offset)
    fit_double_ts = mre.fit(rk, fitfunc=mre.f_two_timescales)

    details_single_ts = fit_single_ts._asdict()
    details_double_ts = fit_double_ts._asdict()

    details_single_ts['fitfunc'] = details_single_ts['fitfunc'].__name__
    details_double_ts['fitfunc'] = details_double_ts['fitfunc'].__name__

    details_single_ts['settings'] = settings.copy()
    details_double_ts['settings'] = settings.copy()

    res = {
        "tau_single_ts": fit_single_ts.tau,
        "tau_double_ts": fit_double_ts.tau,
        "details_tau_single_ts": details_single_ts,
        "details_tau_double_ts": details_double_ts,
    }

    return res


In [4]:
def full_analysis(spikes):
    """
    Take one set of spikes, run hdestimator and mrestimator.

    Idea is to delegate this to a dask worker and get a dictionary back
    that has everything we need

    # Parameters
    spikes : 1d numpy array
        flat list of spike times for a single unit. nans are removed.
    """

    import logging

    logging.getLogger("hdestimator").setLevel("INFO")
    logging.getLogger("mrestimator").setLevel("INFO")
    import hdestimator as hde
    import mrestimator as mre

    spikes = spikes[np.isfinite(spikes)]

    hde_res = hde.api.wrapper(spike_times=spikes, settings=hde_settings)

    mre_res = mre_wrapper(spikes, mre_settings)
    
    return hde_res, mre_res


In [6]:
hde_res

<class 'dict'>
├── firing_rate ...................................................... float64  7.747921837778407
├── firing_rate_sd ................................................... float64  0.0
├── recording_length ................................................. float64  600.2890625
├── recording_length_sd .............................................. float64  0.0
├── H_spiking ........................................................ float64  0.16391766163472438
├── R_tot ............................................................ float64  0.06625277299873156
├── R_tot_sd ........................................................ NoneType
├── AIS_tot .......................................................... float64  0.010859999626768283
├── T_D ................................................................ float  0.15811
├── tau_R ............................................................ float64  0.0534779274937721
├── opt_first_bin_size .....................................

In [13]:
def dask_wrap(meta_df_row):

    index = meta_df_row.iloc[0]
    return index

dask_wrap(meta_df.iloc[0])

787025148

In [2]:
metric_df = utl.load_metrics(meta_df, data_dir)
metric_df

2023-08-09 16:47:56,145 | DEBUG    | ana.utility > load_metrics > Loaded columns ['unit_id', 'image_selectivity_ns'] from /Users/paul/para/2_Projects/information_timescales/repo/gnode/experiment_analysis/dat/brain_observatory_1.1_analysis_metrics.csv
2023-08-09 16:47:56,384 | DEBUG    | ana.utility > load_metrics > Loaded columns ['unit_id', 'pref_dir_dm'] from /Users/paul/para/2_Projects/information_timescales/repo/gnode/experiment_analysis/dat/functional_connectivity_analysis_metrics.csv
2023-08-09 16:47:56,385 | DEBUG    | ana.utility > load_metrics > Matched 0 rows from meta_df in /Users/paul/para/2_Projects/information_timescales/repo/gnode/experiment_analysis/dat/brain_observatory_1.1_analysis_metrics.csv
2023-08-09 16:47:56,470 | DEBUG    | ana.utility > load_metrics > Matched 23293 rows from meta_df in /Users/paul/para/2_Projects/information_timescales/repo/gnode/experiment_analysis/dat/functional_connectivity_analysis_metrics.csv


Unnamed: 0,unit_id,stimulus,session,block,ecephys_structure_acronym,invalid_spiketimes_check,recording_length,firing_rate,filepath,num_spikes,spiketimes,image_selectivity_ns,pref_dir_dm
0,951013153,natural_movie_one_more_repeats,787025148,3.0,VISam,SUCCESS,900.668806,21.542880,/Users/paul/para/2_Projects/information_timesc...,19403,[[[<xarray.DataArray (spiketimes: 30719)>\narr...,,45.0
1,951013143,natural_movie_one_more_repeats,787025148,3.0,VISam,SUCCESS,900.718039,11.652925,/Users/paul/para/2_Projects/information_timesc...,10496,[[[<xarray.DataArray (spiketimes: 30719)>\narr...,,90.0
2,951013133,natural_movie_one_more_repeats,787025148,3.0,VISam,SUCCESS,894.867666,0.448111,/Users/paul/para/2_Projects/information_timesc...,401,[[[<xarray.DataArray (spiketimes: 30719)>\narr...,,-45.0
3,951013202,natural_movie_one_more_repeats,787025148,3.0,VISam,SUCCESS,900.265072,2.384853,/Users/paul/para/2_Projects/information_timesc...,2147,[[[<xarray.DataArray (spiketimes: 30719)>\narr...,,45.0
4,951013187,natural_movie_one_more_repeats,787025148,3.0,VISam,SUCCESS,896.074534,0.706414,/Users/paul/para/2_Projects/information_timesc...,633,[[[<xarray.DataArray (spiketimes: 30719)>\narr...,,90.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
23288,951190716,natural_movie_one_more_repeats,847657808,merged_3.0_and_8.0,LP,SUCCESS,1067.306152,3.486347,/Users/paul/para/2_Projects/information_timesc...,3721,"[<xarray.DataArray ()>\narray(3.1584473, dtype...",,-45.0
23289,951190722,natural_movie_one_more_repeats,847657808,merged_3.0_and_8.0,LP,SUCCESS,1077.341553,2.770709,/Users/paul/para/2_Projects/information_timesc...,2985,"[<xarray.DataArray ()>\narray(0.40185547, dtyp...",,0.0
23290,951190724,natural_movie_one_more_repeats,847657808,merged_3.0_and_8.0,LP,SUCCESS,1076.488525,2.043682,/Users/paul/para/2_Projects/information_timesc...,2200,"[<xarray.DataArray ()>\narray(1.0895996, dtype...",,0.0
23291,951190819,natural_movie_one_more_repeats,847657808,merged_3.0_and_8.0,LP,SUCCESS,1075.182861,2.104758,/Users/paul/para/2_Projects/information_timesc...,2263,"[<xarray.DataArray ()>\narray(1.2353516, dtype...",,-45.0


In [9]:
# natural_movie_three -> brain observatory
meta_df["stimulus"].unique()

array(['natural_movie_one_more_repeats', 'spontaneous',
       'natural_movie_three'], dtype=object)

In [3]:
%watermark -v --iversions

Python implementation: CPython
Python version       : 3.11.4
IPython version      : 8.14.0

IPython       : 8.14.0
h5py          : 3.9.0
numpy         : 1.24.4
pandas        : 2.0.3
dask          : 2023.7.1
xarray        : 2023.7.0
re            : 2.2.1
prompt_toolkit: 3.0.39
logging       : 0.5.1.2
sqlite3       : 2.6.0
sys           : 3.11.4 | packaged by conda-forge | (main, Jun 10 2023, 18:08:41) [Clang 15.0.7 ]
matplotlib    : 3.7.2

