In [5]:
"""
Applying transformations to large files in batches:

BatchProcessor.multi_channel_apply lets you apply transformations to
batches of data where every batch has observations from every channel.

This example show how to extract information from a large file by
processing it in batches.
"""

import logging
import os

import numpy as np
import matplotlib.pyplot as plt

from yass.batch import BatchProcessor
from yass.batch import RecordingsReader

In [2]:
# configure logging to get information about the process
logging.basicConfig(level=logging.INFO)

In [3]:
# raw data file
path_to_neuropixel_data = (os.path.expanduser('~/data/ucl-neuropixel'
                           '/rawDataSample.bin'))

In [42]:
# on each batch, we find the maximum value in every channel
def max_in_channel(batch):
    """Add one to every element in the batch
    """
    return np.max(batch, axis=0)

In [43]:
# create batch processor for the data
bp = BatchProcessor(path_to_neuropixel_data,
                    dtype='int16', n_channels=385, data_format='wide',
                    max_memory='10MB')

# appply a multi channel transformation, each batch will be a temporal
# subset with observations from all selected n_channels, the size
# of the subset is calculated depending on max_memory. Results
# from every batch are returned in a list
res = bp.multi_channel_apply(max_in_channel,
                             mode='memory',
                             channels='all')

INFO:yass.batch.batch:Applying function __main__.max_in_channel...
INFO:yass.batch.batch:__main__.max_in_channel took 1.4983038902282715 seconds


In [44]:
# we have 8 batches, so our list contains 8 elements
len(res)

133

In [45]:
# output for the first batch
res[0]

array([ 55,  44,  50,  51,  63,  50,  46,  51,  43,  51,  59,  49,  51,
        32,  53,  46,  47,  62,  42,  40,  59,  49,  32,  88,  45,  67,
        39,  69,  45,  56,  32,  34,  94,  49,  48,  46,  60,  77,  54,
        48,  60,  40,  91,  51,  47,  68,  46, 100,  64,  63,  62,  55,
        40,  34,  58,  39,  42,  45,  52,  39,  59,  54,  34,  42,  55,
        26,  66,  73,  31,  37,  57,  62,  35,  41,  61,  67,  66,  71,
        71,  29,  71,  45,  45,  40,  79,  53,  62,  37,  84,  32,  37,
        51,  47,  71,  65,  79,  60,  36,  74,  68,  56,  57,  55,  71,
        67,  73,  37,  56,  53,  51,  61,  44,  43,  59, 109,  34,  32,
        45,  52,  54,  87,  56,  40,  44,  54,  56,  52,  26,  83,  48,
        20,  80,  42,  31,  53,  67,  53,  35,  70,  52,  49,  75,  89,
        40,  38,  80,  52,  50,  55,  88,  40,  65,  70,  33,  59,  48,
        61,  75,  52,  45,  43,  71,  62,  72,  84,  63,  54, 107,  66,
        48,  54,  57,  43,  62,  61,  34,  60,  60,  56,  39,  2

In [46]:
# stack results from every batch
arr = np.stack(res, axis=0)

In [47]:
# let's find the maximum value along every channel in all the dataset
np.max(arr, axis=0)

array([137, 119,  99, 120, 107,  95,  85,  97, 124,  97,  96,  83, 126,
        96,  93, 114, 115, 122, 101, 109, 117, 120,  99,  95, 105,  99,
       101, 100,  79, 108,  86,  91, 121,  91, 104, 116, 117, 128, 100,
        96, 106, 110,  94,  98,  86,  80, 113, 110, 108, 110, 100, 112,
        95, 109,  80,  89,  98, 123, 112, 113, 149, 101,  92, 107, 120,
        94,  91, 103, 112,  91, 100, 109, 101, 113, 130, 104, 118,  92,
        87, 101,  94, 113,  94,  97, 109, 105, 112,  95, 112,  88, 115,
       105, 125, 138, 120, 128,  95, 113,  98, 154, 108, 100, 102, 112,
       101, 144, 125, 104, 107, 117, 104,  91,  97,  89, 109, 103,  97,
       105,  94, 112, 110,  98, 135, 111, 127,  91,  95, 116, 108, 113,
       128, 111, 109, 105,  88, 110,  97, 101, 125, 116, 124, 102, 109,
       147, 106,  97, 107, 120, 122, 104, 103, 113, 103, 106, 117, 106,
        97, 142, 110, 109, 110, 104,  94, 115, 119, 120, 125, 121, 109,
       119, 108,  99, 101, 111, 100, 121, 100, 107,  95,  68, 14