# Preprocess 10m recording

author: laquitainesteeve@gmail.com

Tested on Ubuntu 24 with RTX5090 GPU

Execution time: 40 secs

## Setup

1. activate spikebias environment kernel

    ```bash
    python -m ipykernel install --user --name spikebias --display-name "spikebias"
    ```


In [1]:
%%time 
%load_ext autoreload
%autoreload 2

import os
import spikeinterface.preprocessing as spre
import spikeinterface as si

# move to PROJECT PATH
PROJ_PATH = "/home/steeve/steeve/epfl/code/spikebias/"
os.chdir(PROJ_PATH)

# import custom package
from src.nodes.postpro import waveform
from src.nodes.prepro.run import run as prep

# setup paths and parameters
REC_SECS = 600             # recording duration in seconds
FREQ_MIN = 300             # lower cutoff of bandpass filtering
RAW_PATH = os.path.join(PROJ_PATH, "dataset/00_raw/recording_npx_spont")
PREP_PATH = os.path.join(PROJ_PATH, "dataset/01_intermediate/preprocessing/recording_npx_spont")

# parallel processing parameters
N_JOBS = 20                # (default = 20) number of parallel processes for waveform extraction (20/32)
CHUNKS = 50000             # (default = 800000) use smaller chunks to use less RAM

CPU times: user 2.03 s, sys: 2.17 s, total: 4.2 s
Wall time: 2.12 s


In [2]:
# load recording
Recording = si.load_extractor(RAW_PATH)
Recording = spre.astype(Recording, "int16")

# sample first 10 min of recording
sfreq = Recording.sampling_frequency

# nyquist frequency for high-pass filtering
FREQ_MAX = sfreq/2 - 1 # Nyquist frequency



In [3]:
%%time 

# load recording
Recording = si.load_extractor(RAW_PATH)
Recording = spre.astype(Recording, "int16")

# sample first 10 min of recording
sfreq = Recording.sampling_frequency
Recording = Recording.frame_slice(start_frame=0, end_frame=Recording.sampling_frequency*REC_SECS)

# preprocess recording
Recording = prep(Recording, freq_min=FREQ_MIN, freq_max=FREQ_MAX)

# save recording
Recording.save(folder=PREP_PATH, n_jobs=N_JOBS, 
            verbose=True, progress_bar=True, overwrite=True, 
            dtype="float32", chunk_size=CHUNKS)

2025-07-23 14:07:24,295 - root - run.py - run - INFO - Band-pass filtered in 0.0 secs
2025-07-23 14:07:24,295 - root - run.py - run - INFO - Pipeline completed in 0.0 secs
write_binary_recording with n_jobs = 20 and chunk_size = 50000


write_binary_recording:   0%|          | 0/480 [00:00<?, ?it/s]

CPU times: user 283 ms, sys: 346 ms, total: 629 ms
Wall time: 1min 6s


BinaryFolderRecording: 384 channels - 40.0kHz - 1 segments - 24,000,000 samples 
                       600.00s (10.00 minutes) - float32 dtype - 34.33 GiB