# Predictive Time Stretching

This is an attempt to use scipy's interpolation functionality to reconstruct an audio file over a different number of samples.

Audio files were obtained via http://soundbible.com/

In [None]:
from scipy.io import wavfile as wav
from scipy.interpolate import interp1d
from scipy.fftpack import fft, ifft
import numpy as np
import pandas as pd
import math

In [None]:
# This will drop a channel if two are extracted from the wav file
def drop_check(df):
    samples, channels = df.shape
    if channels > 1:
        df.drop(1, axis=1, inplace=True)

# This will extract the raw data from the wav file
def extract_wav_data(filename):
    rate, data = wav.read(filename)
    inputRaw = pd.DataFrame(data)
    
    drop_check(inputRaw)

    inputRaw.columns=['raw']
    return inputRaw

In [None]:
def time_stretch(wavfile, stretchMag, outputfile):
    bufferdata = extract_wav_data(wavfile)
    bufferdata['index'] = bufferdata.index
    
    # Now we need to use the timestretch magnitude to create the list of indeces
    if stretchMag > 0:
        predIndeces = slew(0, len(bufferdata), math.ceil(len(bufferdata) * abs(stretchMag)))
    elif stretchMag < 0:
        predIndeces = slew(len(bufferdata), 0, math.ceil(len(bufferdata) * abs(stretchMag)))
    else:
        return None
    
    wavInterpo = interp1d(bufferdata['index'].values, bufferdata['raw'].values, 
                          fill_value='extrapolate', assume_sorted=True)
    new_Y = wavInterpo(predIndeces)
 
    rate, data = wav.read(wavfile)
    write_wav_file(outputfile, new_Y, rate)
    
def write_wav_file(filename, data, rate):
    # Some scaling to avoid insane clipping, with a touch of headroom
    scaled = (data / np.abs(data).max()) * 0.95
    
    # Adding some padding to reduce popping at the ends of the output for some algorithms
    from_zero = slew(0, scaled[0], math.floor(len(data)/20))
    to_zero = slew(scaled[-1], 0, math.floor(len(data)/20))
    from_zero.extend(scaled)
    from_zero.extend(to_zero)
    # Writing the complete, concatenated data array out
    wav.write(filename, rate, np.array(from_zero))
    
def slew(start, end, values):
    # First, we calculate the step size
    stepSize = (start + end)/values
    # Initialize array with the starting value
    output = [start]
    # Basically, if the magnitude of the start is greater than that of the end,
    #  we need to shrink the magnitude, not expand it
    if(np.abs(start) < np.abs(end)):
        for i in range(values - 1):
            output.append(output[i] + stepSize)
    else:
        for i in range(values - 1):
            output.append(output[i] - stepSize)
    # Adding the final value to the end for good measure
    output.append(end)
    return output

# Checks if a number is a decimal or not, e.g.:
#  54 will return False
#  54.0 will return False
#  54.0000001 will return True
#  54.72323 will return True
def check_decimal(number):
    if isinstance(number, float):
        if number != math.floor(number) and number != math.ceil(number):
            return True
    return False

In [None]:
audioFile = './input_audio/Warbling_Vireo-Mike_Koenig-89869915.wav'

time_stretch(audioFile, 2.5, 'output_audio/timestretch2_5.wav')
time_stretch(audioFile, 0.42, 'output_audio/timestretch0_42.wav')
time_stretch(audioFile, -1.42, 'output_audio/timestretch-1_42.wav')