In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from pprint import pprint
from pathlib import Path
from tools.settings import settings

## Example from mtscomp for compression and decompression
```python
import numpy as np
from mtscomp.mtscomp import compress, decompress

# Compress a .bin file into a pair .cbin (compressed binary file) and .ch (JSON file).
compress('data.bin', 'data.cbin', 'data.ch', sample_rate=20000., n_channels=256, dtype=np.int16)
# Decompress a pair (.cbin, .ch) and return an object that can be sliced like a NumPy array.
arr = decompress('data.cbin', 'data.ch')
X = arr[start:end, :]  # decompress the data on the fly directly from the file on disk
arr.close()  # Close the file when done
```

## setup

#### set paths

In [3]:
paths = settings.paths
experiment = settings.experiment

raw_drive = paths.drive
expt_folder = experiment.dir
batch_folder = raw_drive / expt_folder / 'to_compress'
print(f'Looking for recordings in:\n\t{batch_folder}')

# Define the base path where the project folders for each subject will be located
    # this should or will contain subject folders like 'NP02_R1', etc.
# For our projects, for each experiment `data_dir` by default is "1_Recordings"
project_base_path = raw_drive / expt_folder / paths.data_dir
raw_dir = paths.raw_dir  # folder name in session folder where compressed recordings will go

# target_folder = batch_folder.parent / 'compressed_recordings'
# target_folder.mkdir(exist_ok=True)
# print(f'\nCompressed recordings will be saved to:\n\t{target_folder}')

Looking for recordings in:
	/mnt/fasthd/4_Behavioral_Sensitization/to_compress


set subject / recording pairs to process

In [4]:
recording_pairs = experiment.recordings
print("Processing the following recordings:")
pprint(recording_pairs, indent=4)

Processing the following recordings:
{   'BS03_R3': {'concatenate': False, 'multiple_shanks': [True, False]},
    'BS05_R2': {'concatenate': False, 'multiple_shanks': [True, False]}}


### before compressions...
If you haven't yet, set up experiment folder structure for each recording using the `set_up_folders` tool.  

Currently there are two options:   
* *Command-line tool*
    * Useful to set up structure for one session, or to have some more control over output.
    * If you installed with uv, should be able to use the following to see basic usage, see README for further instructions.  
    
        ```bash
        uv run set_up_folders --help
        ```
* *Set up default structures (histology & recording templates)*
    * For all recordings in `recording_pairs`, or single recording if `recording_name` is other than `None`.
    * See further instructions below

In [None]:
from tools.set_up_folders import setup_folders_from_dict

# set to True to see what folders would be created without actually creating them
in_debug_mode = False

recording_name = None

if recording_name is not None:
    print(f'---setting up folders for recording {recording_name}...')
else:
    print(f'---setting up folders for all recordings in recording_pairs...')
setup_folders_from_dict(
    recording_pairs,
    base_path=project_base_path,
    debug=in_debug_mode
)

## compress recordings

**single recording**  
To compress a single recording, assign `recording_name` to one of the entries in `run_config.toml` under `[experiment.recordings]` header, for example "NP02_R1".  
Run the `compress_recordings` cell afterwards to compress the recording to `target_folder`.  

**batch compress**  
To batch compress, simply leave `recording_name` as `None`, then run the `compress_recordings` cell to compresse all recordings in `recording_pairs` to `target_folder`.

set parallel processing parameters

In [5]:
num_cores = os.cpu_count()
job_kwargs=dict(
    n_threads=round(num_cores*0.8),
    chunk_duration=5,
)
print("\nParallel Job parameters:")
pprint(job_kwargs, indent=4)


Parallel Job parameters:
{'chunk_duration': 5, 'n_threads': 10}


In [6]:
# change to other than None to compress specific raw recording (including multiple segments)
# e.g. "NP02_R1", or keep as None to batch compress all recordings in recording_pairs
recording_name = None

In [None]:
from tools.compression import compress_recordings
# TODO: fix pathing on multiprobe, multishank recs, + place in subfolder in compression dir
if recording_name is not None:
    # compress single recording session
    properties = recording_pairs[recording_name]
    print(f'---processing single recording')
    compress_recordings(
        {recording_name: properties},
        batch_folder,
        raw_dir,
        project_base_path=project_base_path,
        job_kwargs=job_kwargs
    )
else:
    # compress all recording sessions
    print(f'---processing all recordings in recording_pairs...')
    compress_recordings(
        recording_pairs,
        batch_folder,
        raw_dir,
        project_base_path=project_base_path,
        job_kwargs=job_kwargs
    )
