# Jumble

A basic utility - takes in an audio file, chops it up into x different equally-sized segments, randomly sorts the segments, and then stitches them back together. For the indiscriminating mad scientist.

In [None]:
from scipy.io import wavfile as wav
import numpy as np
import pandas as pd
import math

In [None]:
# This will drop a channel if two are extracted from the wav file
def drop_check(df):
    samples, channels = df.shape
    if channels > 1:
        df.drop(1, axis=1, inplace=True)

# This will extract the raw data from the wav file
def extract_wav_data(filename):
    rate, data = wav.read(filename)
    inputRaw = pd.DataFrame(data)
    
    drop_check(inputRaw)

    inputRaw.columns=['raw']
    return inputRaw

In [None]:
def write_wav_file(filename, data, rate):
    # Some scaling to avoid insane clipping, with a touch of headroom
    scaled = (data / np.abs(data).max()) * 0.95
    
    # Adding some padding to reduce popping at the ends of the output for some algorithms
    from_zero = slew(0, scaled[0], math.floor(len(data)/20))
    to_zero = slew(scaled[-1], 0, math.floor(len(data)/20))
    from_zero.extend(scaled)
    from_zero.extend(to_zero)
    # Writing the complete, concatenated data array out
    wav.write(filename, rate, np.array(from_zero))
    
def slew(start, end, values):
    # First, we calculate the step size
    stepSize = (start + end)/values
    # Initialize array with the starting value
    output = [start]
    # Basically, if the magnitude of the start is greater than that of the end,
    #  we need to shrink the magnitude, not expand it
    if(np.abs(start) < np.abs(end)):
        for i in range(values - 1):
            output.append(output[i] + stepSize)
    else:
        for i in range(values - 1):
            output.append(output[i] - stepSize)
    # Adding the final value to the end for good measure
    output.append(end)
    return output

# Checks if a number is a decimal or not, e.g.:
#  54 will return False
#  54.0 will return False
#  54.0000001 will return True
#  54.72323 will return True
def check_decimal(number):
    if isinstance(number, float):
        if number != math.floor(number) and number != math.ceil(number):
            return True
    return False

In [None]:
def jumble(wavfile, splits, outputfile):
    bufferdata = extract_wav_data(wavfile)
    bufferdata['index'] = bufferdata.index
    
    if (splits > len(bufferdata)/2) or (splits < 2) or check_decimal(splits):
        return None
    
    # We'll use numpy to extract x different sets of samples, then arrange them randomly
    segments = np.array_split(bufferdata, splits)
    np.random.shuffle(segments)
    output_data = concat_audio_df_array(segments)
    
    rate, data = wav.read(wavfile)
    write_wav_file(outputfile, output_data, rate)
    
def concat_audio_df_array(df_array):
    output = []
    for i in range(len(df_array) - 1):
        temp = df_array[i]['raw'].values
        if i > 0:
            slewed = np.concatenate((np.array(slew(output[-1], temp[0], math.ceil(len(temp)/1000) + 1)), temp), axis=0)
            output = np.concatenate((output, slewed), axis=0)
        else:
            output = np.concatenate((output, temp), axis=0)
    return output

In [None]:
audioFile = './input_audio/bells-tibetan-daniel_simon.wav'

jumble(audioFile, 7, './output_audio/jumble_7_splits.wav')
jumble(audioFile, 50, './output_audio/jumble_50_splits.wav')
jumble(audioFile, 500, './output_audio/jumble_500_splits.wav')

## Jumble 2: 2wice the Files, 2wice the Awesome

Moving forward, let's try a more interesting approach by interpolating two files in order. If we slice the files into x portions, and then stitch them back together in order, the result may be compelling.

In [None]:
def jumble2(wavfileA, wavfileB, splits, outputfile):
    bufferdataA = extract_wav_data(wavfileA)
    bufferdataA['index'] = bufferdataA.index
    
    bufferdataB = extract_wav_data(wavfileB)
    bufferdataB['index'] = bufferdataB.index
    
    if (splits > len(bufferdataA)/2) or (splits > len(bufferdataB)/2) or (splits < 2) or check_decimal(splits):
        return None
    
    # We'll use numpy to extract x different sets of samples, then arrange them randomly
    segmentsA = np.array_split(bufferdataA, splits)
    segmentsB = np.array_split(bufferdataB, splits)
    combinedSegments = np.array([])
    
    for i in range(len(segmentsA)):
        betweenSlew = slew(segmentsA[i]['raw'].values[-1], segmentsB[i]['raw'].values[0], math.ceil(len(segmentsA)/20) + 1)
        if i > 0:
            beforeSlew = slew(combinedSegments[-1], segmentsA[i]['raw'].values[0], math.ceil(len(segmentsA[i])/1000) + 1)
            combinedSegments = np.append(combinedSegments, beforeSlew)
        
        combinedSegments = np.append(combinedSegments, segmentsA[i]['raw'])
        combinedSegments = np.append(combinedSegments, betweenSlew)
        combinedSegments = np.append(combinedSegments, segmentsB[i]['raw'])
    
    rate, data = wav.read(wavfileA)
    write_wav_file(outputfile, combinedSegments, rate)

In [None]:
fileA = './input_audio/Indian Bell-SoundBible.com-1882377860.wav'
fileB = './input_audio/bells-tibetan-daniel_simon.wav'

jumble2(fileA, fileB, 50, './output_audio/jumble2_50_splits.wav')
jumble2(fileA, fileB, 300, './output_audio/jumble2_300_splits.wav')
jumble2(fileA, fileB, 1200, './output_audio/jumble2_1200_splits.wav')