In [1]:
#from audio import load
from librosa import load

In [15]:
samples, sr = librosa.load('../tests/silence_10s.mp3')

In [162]:
import random
import numpy as np

# Chunk extraction

### Helper function: `wraparound_extract()`

In [260]:
def wraparound_extract(original, begin, length):
    '''
    Extracts elements from numpy.array in a "wraparound" fashion
    
    Extracts a certain number of elements from 
    a numpy.array starting at a certain position.
    If the chosen position and length go
    past the end of the array, the extraction
    "wraps around" to the beginning of the numpy.array
    as many times as necessary. For instance:
    
    wraparound_extract(
        original = [0, 5, 10],
        begin = 1, 
        length = 7) -> [5, 10, 0, 5, 10, 0, 5]
    
    Args:
        original (np.array): the original array 
        begin (int): beginning position to extract
        length (int): number of elements to extract
    '''

    # Get `head`: the array after the beginning position
    assert(type(original) == np.ndarray)
    len_original = original.shape[0]
    begin = begin % len_original
    head = original[begin:]
    len_head = head.shape[0]

    # Number of elements we require for full wrap-around
    wrap_needed = length - len_head

    # Generate the desired list, wrapped if necessary
    if wrap_needed > 0:
        repeats = np.tile(original, int(wrap_needed/len_original))
        tail = np.array(original[ : (wrap_needed % len_original)])
        desired_list = np.concatenate((head, repeats, tail))
    else:
        desired_list = original[begin:begin+length]
    
    #print(desired_list)
    return desired_list

### Tests of `wraparound_extract()`

In [261]:
# test zero beginning, not getting to end of original array
assert(np.array_equal(wraparound_extract(original = np.array([0, 1]), begin = 0, length = 1), np.array([0])))

# test zero beginning, not getting to end of original array
assert(np.array_equal(wraparound_extract(original = np.array([0, 1]), begin = 0, length = 2), np.array([0, 1])))

# test zero beginning, not wrapping
assert(np.array_equal(wraparound_extract(original = np.array([0, 1]), begin = 0, length = 2), np.array([0, 1])))

# test zero beginning, wrapping around
assert(np.array_equal(wraparound_extract(original = np.array([0, 1]), begin = 0, length = 3), np.array([0, 1, 0])))

# test nonzero beginning, not wrapping
assert(np.array_equal(wraparound_extract(original = np.array([0, 1]), begin = 1, length = 1), np.array([1])))

# test nonzero beginning, wrapping around
assert(np.array_equal(wraparound_extract(original = np.array([0, 1]), begin = 1, length = 3), np.array([1, 0, 1])))

# test multiwrap
assert(np.array_equal(wraparound_extract(original = np.array([0, 1]), begin = 1, length = 10), np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0])))

# test wrapping around beginning
assert(np.array_equal(wraparound_extract(original = np.array([0, 1]), begin = 5, length = 3), np.array([1, 0, 1])))



### Main function: `get_chunk()`

In [276]:
def get_chunk(
    samples, 
    sample_rate,
    start_position = None, # randomize start position
    duration = 5, # 5 seconds
    duration_jitter = 0.5, #jitter duration +- 0.5s
    chance_random_skip = 0.3 #randomly skip 30% of the time
):
    '''
    Extracts chunk of audio with some augmentation
    
    Extracts samples of audio from a master list
    of samples. 
    
    Available data augmentation options include:
        - selecting a position to start extracting from
          or allowing function to randomly choose start
        - selecting duration of chunk and allowing
          for random jitter of duration
        - randomly skipping some number of samples from
          0 to the length of the chunk
    
    If the chunk to be extracted reaches the end of the
    samples, the chunk will "wrap around" and start
    reading from the beginning of the samples.
    
    Args
        samples (numpy.array) - audio samples loaded
            by librosa.load or audio.load
        sample_rate (int) - sample rate of `samples`
        start_position (int) - position in the file to start
            extracting samples from. If None, the start position 
            is chosen randomly
        duration (float) - desired duration, in seconds, 
            of chunk to extract
        duration_jitter (float) - if this value is not 0,
            the duration of the chunk extracted will 
            be randomly selected from the range 
            (duration - duration_jitter, duration + duration_jitter)
        chance_random_skip (float between 0 and 1) - 
            percent chance of random skipping. In a random skip,
            a position within the chunk will be randomly
            selected, and from that position in the 
            audio file, a random number of samples will 
            be skipped. The number of samples skipped is between
            0 and the number of samples in the entire chunk
    
    Returns
        samples
    '''
    
    # Get a random start position
    num_samples = len(samples)
    if not start_position:
        start_position = random.randrange(0, num_samples)

    # Convert seconds to samples
    seconds_to_extract = duration + random.uniform(-duration_jitter, duration_jitter)
    samples_to_extract = int(seconds_to_extract * sample_rate)
    
    # Get chunks with skip in the middle with probability = chance_random_skip
    if random.random() < chance_random_skip:
        position_to_skip = random.randrange(0, samples_to_extract)
        amount_to_skip = random.randrange(0, samples_to_extract)

        chunk_1_start = start_position
        chunk_1_end = chunk_1_start + position_to_skip
        chunk_2_start = chunk_1_end + amount_to_skip
        chunk_2_end = chunk_1_start + (samples_to_extract - position_to_skip)
        
        chunk_1 = wraparound_extract(samples, chunk_1_start, chunk_1_end)
        chunk_2 = wraparound_extract(samples, chunk_2_start, chunk_2_end)
        chunk = np.concatenate((chunk_1, chunk_2))
    
    # Otherwise get contiguous chunk
    else:
        chunk = wraparound_extract(samples, start_position, samples_to_extract) 
        
    
    return chunk
    

get_chunk(samples = samples, sample_rate = sr)

### TODO: tests for `get_chunk()`

# Cyclic shift

In [332]:
def cyclic_shift(array, split_point = None):
    '''
    Shift array cyclicly by a random amount
    
    Shift array cyclicly by a random amount. Equivalent to
    splitting array into two parts at a random element, then
    switching the order of the parts.
    
    Args: 
        array (np.array): 1D-array to be split
        split_point (float): float in (0, 1) describing
            where in array to split -- for testing purposes.
            For stochastic splitting, leave as None.
    
    Returns:
        shifted_array: shifted array
    '''
    
    assert(type(array) == np.ndarray)
    length = array.shape[0]
    
    # Stochastic split point, or split point by floor of split_point * length of array
    if not split_point: split_point = random.randint(0, length)
    else: split_point = int(split_point * length)
    
    return np.concatenate((array[split_point:], array[:split_point]))

In [334]:
# Test random splitting
random.seed(100)
assert(np.array_equal(cyclic_shift(np.array((0, 1, 2, 3, 4, 5, 6, 7))), np.array([2, 3, 4, 5, 6, 7, 0, 1])))

In [336]:
# Test deterministic splitting
assert(np.array_equal(cyclic_shift(np.array([0, 1, 2]), split_point=0.5), np.array([1, 2, 0])))

# Test deterministic splitting
assert(np.array_equal(cyclic_shift(np.array([0, 1, 2, 3]), split_point=0.5), np.array([2, 3, 0, 1])))