# Sort missed, detected sparse cells waveforms

author: steeve.laquitaine@epfl.ch


* `534` near-contact cells
* `472` pyramidal cells


setup virtual env env_kilosort_silico

In [3]:
# SETUP PACKAGES 
%load_ext autoreload
%autoreload 2
import os 
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import shutil
import glob

# SET PROJECT PATH
PROJ_PATH = "/gpfs/bbp.cscs.ch/project/proj68/home/laquitai/spike-sorting"
os.chdir(PROJ_PATH)
from src.nodes.utils import get_config, write_metadata
from src.nodes.prepro import preprocess
from src.nodes.truth.silico import ground_truth
from src.nodes.io.silico.loading import load_campaign_params
from src.nodes.dataeng.silico import probe_wiring
from src.pipes.sorting import match_sorted_to_true_neuropixels_2023_02_19 as match
from src.nodes.postpro import waveform, feateng

# SET PARAMETERS
EXPERIMENT = "silico_neuropixels"   # specifies the experiment 
SIMULATION_DATE = "2023_02_19"      # specifies the run (date)
MS_BEFORE = 3       # ms
MS_AFTER = 3        # ms
CELL_ID = 4229506   # the most active near-probe pyramidal cell
CHANNEL_IDS = np.arange(64,80).tolist()   # plot traces from channels nearest to filtered neuron group
LOAD_GROUND_TRUTH = True
LOAD_WAVEFORMS = True


# SETUP CONFIG
data_conf, param_conf = get_config(EXPERIMENT, SIMULATION_DATE).values()


# SETUP PATH
SPIKE_FILE_PATH = data_conf["dataeng"]["campaign"]["output"]["spike_file_path"]
RAW_LFP_TRACE_FILE_PATH = data_conf["dataeng"]["campaign"]["output"]["trace_file_path"]
CELL_TRACE_FILE_PATH = data_conf["dataeng"]["campaign"]["output"]["unit_trace_path"]+"cell_3754013_trace.pkl"
CELL_MATCHING_PATH = data_conf["postprocessing"]["cell_matching"]


# SET WAVEFORM FOLDER
STUDY_FOLDER = data_conf["postprocessing"]["waveform"]["study"]


# FIGURE 
FIG_PATH = data_conf["figures"]["silico"]["lfp_derived_waveforms_test"]

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
2023-05-26 14:45:26,536 - root - utils.py - get_config - INFO - Reading experiment config.
2023-05-26 14:45:26,560 - root - utils.py - get_config - INFO - Reading experiment config. - done


## Get spike waveforms

1. We preprocess the raw LFP traces
2. We extract the waveforms within -3 to 3 ms centered on the spike timestamps of the most active pyramidal to have a large enough spike sample size.
3. We plot the spike-triggered average of each contact
4. We plot the neuron and contact locations associated with each spike triggered average

TODO:
- spike location conversion takes lots of time. Solve or parallelize.

In [5]:
# load existing waveform extractor
if LOAD_WAVEFORMS:
    lfp_recording = preprocess.load(data_conf)
    WaveformExtractor = waveform.load(lfp_recording, study_folder=STUDY_FOLDER, ms_before=MS_BEFORE, ms_after=MS_AFTER)
else:
    # WARNING ! Preprocess recording and extract spikes (stats: 2 hours / 10 min recording, 534 near-contact cells)
    WaveformExtractor = waveform.run_from_files(
        experiment=EXPERIMENT,
        simulation_date=SIMULATION_DATE,
        lfp_trace_file=RAW_LFP_TRACE_FILE_PATH,
        spike_file=SPIKE_FILE_PATH,
        study_folder=STUDY_FOLDER,
        ms_before=MS_BEFORE,
        ms_after=MS_AFTER
        )

## Get Ground truth sorting

In [6]:
# filter all near-contact pyramidal cells
simulation = load_campaign_params(data_conf)

# load
if LOAD_GROUND_TRUTH:
    Truth = ground_truth.load(data_conf)
else:
    # or compute
    Truth = ground_truth.run(simulation, data_conf, param_conf)["ground_truth_sorting_object"]
cell_morph = simulation["circuit"].cells.get(Truth.unit_ids, properties=['morph_class'])
CELL_IDS = cell_morph[cell_morph["morph_class"] == "PYR"].index.values
print("near-contact pyramidal cells (count): ", len(CELL_IDS))

2023-05-26 14:49:31,624 - root - ground_truth.py - load - INFO - loading already processed ground truth SortingExtractor ...
2023-05-26 14:49:31,662 - root - ground_truth.py - load - INFO - loading already processed true sorting - done in 0.0
near-contact pyramidal cells (count):  472


In [9]:
# for each pyramidal cell (15 min for 472 cells)
for cell_i in CELL_IDS:
    
    # plot waveform
    fig = waveform.plot(WaveformExtractor=WaveformExtractor, cell_id=cell_i)

    # write
    if not os.path.isdir(f"{FIG_PATH}pyramidal/"):
        os.makedirs(f"{FIG_PATH}pyramidal/")
    fig.savefig(f"{FIG_PATH}pyramidal/cell_{cell_i}", dpi=300, bbox_inches='tight')

    # prevent display in notebook
    plt.close()

## Sort missed, detected sparse firing cells

In [7]:
# stats: 40 secs, 534 cells, 1 sorter
# create Matching object 
out = match.run()

# load created Matching object
cell_matching = pd.read_parquet(CELL_MATCHING_PATH)

# label missed and detected cells
cell_matching = feateng.add_is_missed(cell_matching, data_conf)

2023-05-26 14:50:44,033 - root - match_sorted_to_true_neuropixels_2023_02_19.py - run - INFO - loading kilosort3 Sorting Extractor
2023-05-26 14:50:44,060 - root - match_sorted_to_true_neuropixels_2023_02_19.py - run - INFO - loading kilosort3 Sorting Extractor - done: 0.0
2023-05-26 14:50:44,062 - root - match_sorted_to_true_neuropixels_2023_02_19.py - run - INFO - loading Ground truth Sorting Extractor
2023-05-26 14:50:44,071 - root - match_sorted_to_true_neuropixels_2023_02_19.py - run - INFO - loading Ground truth Sorting Extractor - done: 0.0
2023-05-26 14:52:56,592 - root - match_sorted_to_true_neuropixels_2023_02_19.py - run - INFO - Detecting true cell oversplit - done
2023-05-26 14:52:56,599 - root - match_sorted_to_true_neuropixels_2023_02_19.py - run - INFO - Detecting true cell misses - done


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cell_matching["oversplit_true_cell"].iloc[oversplit.index] = True


2023-05-26 14:53:09,820 - root - match_sorted_to_true_neuropixels_2023_02_19.py - run - INFO - loading kilosort3 Sorting Extractor
2023-05-26 14:53:09,832 - root - match_sorted_to_true_neuropixels_2023_02_19.py - run - INFO - loading kilosort3 Sorting Extractor - done: 0.0
2023-05-26 14:53:09,833 - root - match_sorted_to_true_neuropixels_2023_02_19.py - run - INFO - loading Ground truth Sorting Extractor
2023-05-26 14:53:09,844 - root - match_sorted_to_true_neuropixels_2023_02_19.py - run - INFO - loading Ground truth Sorting Extractor - done: 0.0
2023-05-26 14:53:19,760 - root - match_sorted_to_true_neuropixels_2023_02_19.py - run - INFO - Detecting true cell oversplit - done
2023-05-26 14:53:19,766 - root - match_sorted_to_true_neuropixels_2023_02_19.py - run - INFO - Detecting true cell misses - done


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cell_matching["oversplit_true_cell"].iloc[oversplit.index] = True


In [8]:
# find missed and detected
df = cell_matching.drop_duplicates(subset=['true_cell_match'])
detected_sparse_cells = df[(df["true firing rate"]<=0.2) & (df["detection_label"]=="detected")]["true_cell_match"]
missed_sparse_cells = df[(df["true firing rate"]<=0.2) & (df["detection_label"]=="missed")]["true_cell_match"]

## Write sorted cell waveform plots to disk

In [57]:
# loop through waveform figures and sort
waveform_figs = glob.glob(f"{FIG_PATH}pyramidal/*.png")

# create path to sort figures
os.makedirs(f"{FIG_PATH}pyramidal/missed_sparse/")
os.makedirs(f"{FIG_PATH}pyramidal/detected_sparse/")

# loop through waveform figures and sort
for fig_i, fig_name in enumerate(waveform_figs):
    
    # get figure's cell id
    fig_cell_id = int(os.path.basename(fig_name).removeprefix("cell_").removesuffix(".png"))

    # sort and copy missed and detected sparse cell figures
    # in two folders
    if fig_cell_id in missed_sparse_cells.values:
        shutil.copy2(f"{FIG_PATH}pyramidal/cell_{fig_cell_id}.png", f"{FIG_PATH}pyramidal/missed_sparse/cell_{fig_cell_id}.png")
    elif fig_cell_id in detected_sparse_cells.values:
        shutil.copy2(f"{FIG_PATH}pyramidal/cell_{fig_cell_id}.png", f"{FIG_PATH}pyramidal/detected_sparse/cell_{fig_cell_id}.png")