In [None]:
from studies.study_factory import StudyFactory
from dataloader.dataloader import DataLoader

dataloader = DataLoader(
    buffer_size=10,
    max_cache_size=10,
    cache_dir="cache",
    notch_filter=True,
    frequency_bands={"all": (0.5, 40)},
    scaling="both",
    brain_clipping=20,
    baseline_window=0.5,
    new_freq=100,
    batch_types={"audio": 100},
    batch_kwargs={
        'audio': {
            'max_random_shift': 2,
            'window_size': 4,
            'window_stride': 1, 
            'audio_sample_rate': 16000,
            'hop_length': 160,
            'audio_processor': "openai/whisper-large-v3"
        }
    },
)

In [None]:
study = StudyFactory.create_study(
    study_name='gwilliams2023',
    batch_type='audio',
    path='data/gwilliams2023',
    cache_enabled=True,
    max_cache_size=200, # in items
)

Loading GWilliams2023 with batch type audio


In [None]:
recordings = []

# Unfold all recordings (3 dim) of python list to 1
from itertools import chain
import random

flat_recordings = list(chain.from_iterable(chain.from_iterable(study.recordings)))
random.shuffle(flat_recordings)

In [None]:
# # Start background fetching
dataloader.start_fetching(flat_recordings, cache=True)

# Process batches as they become available
try:
    while True:
        batch = dataloader.get_recording()
        
        if batch is None:
            break
        
        brain = batch.brain_segments['all']
        print(
            f'{batch.recording.cache_path} loaded with {brain.shape[0]} windows.'
        )
        
except KeyboardInterrupt:
    print("Interrupted")
    dataloader.stop()