In [250]:
import math
import pywt
import numpy as np
import tensorflow as tf
import scipy.io.wavfile as wavfile
from scipy.interpolate import interp1d

# PRE-PROCESSING

In [225]:
input_rate, input_signal = wavfile.read('data/1clean_Selection.wav')
output_rate, output_signal = wavfile.read('data/1Selection.wav')

## Testing out pywt functions and reconstruction

In [261]:
# some settings:
wavetype = 'db10'
# wavelevel = 15

w = pywt.Wavelet(wavetype)
wavelevel = pywt.dwt_max_level(data_len=input_signal.shape[0], filter_len=w.dec_len)

# set a little lower:
# wavelevel = 1
print("Max wave level decomposition: " + str(wavelevel))

# these are floats, original is int16
input_coeffs1 = pywt.wavedec(input_signal[:,0].T, wavetype, level=wavelevel)
input_coeffs2 = pywt.wavedec(input_signal[:,1].T, wavetype, level=wavelevel)
# output_coeffs1 = pywt.wavedec(output_signal[:,0].T, wavetype, level=wavelevel)
# output_coeffs2 = pywt.wavedec(output_signal[:,1].T, wavetype, level=wavelevel)
print("input signal shape: " + str(input_signal.shape))

# reconstruction for left and right channel
recons1 = np.array([pywt.waverec(input_coeffs1, wavetype)]).astype('int16')
recons2 = np.array([pywt.waverec(input_coeffs2, wavetype)]).astype('int16')

# print(recons1.shape)

write_array = np.concatenate((recons1,recons2),axis=0).T
print('reconstruction shape: ' + str(write_array.shape))

# output wav for auditory test
wavfile.write('output/recons.wav', input_rate, write_array)

# get available wavelets
print(pywt.wavelist())

Max wave level decomposition: 16
input signal shape: (1810432, 2)
reconstruction shape: (1810432, 2)
['bior1.1', 'bior1.3', 'bior1.5', 'bior2.2', 'bior2.4', 'bior2.6', 'bior2.8', 'bior3.1', 'bior3.3', 'bior3.5', 'bior3.7', 'bior3.9', 'bior4.4', 'bior5.5', 'bior6.8', 'cgau1', 'cgau2', 'cgau3', 'cgau4', 'cgau5', 'cgau6', 'cgau7', 'cgau8', 'cmor', 'coif1', 'coif2', 'coif3', 'coif4', 'coif5', 'coif6', 'coif7', 'coif8', 'coif9', 'coif10', 'coif11', 'coif12', 'coif13', 'coif14', 'coif15', 'coif16', 'coif17', 'db1', 'db2', 'db3', 'db4', 'db5', 'db6', 'db7', 'db8', 'db9', 'db10', 'db11', 'db12', 'db13', 'db14', 'db15', 'db16', 'db17', 'db18', 'db19', 'db20', 'db21', 'db22', 'db23', 'db24', 'db25', 'db26', 'db27', 'db28', 'db29', 'db30', 'db31', 'db32', 'db33', 'db34', 'db35', 'db36', 'db37', 'db38', 'dmey', 'fbsp', 'gaus1', 'gaus2', 'gaus3', 'gaus4', 'gaus5', 'gaus6', 'gaus7', 'gaus8', 'haar', 'mexh', 'morl', 'rbio1.1', 'rbio1.3', 'rbio1.5', 'rbio2.2', 'rbio2.4', 'rbio2.6', 'rbio2.8', 'rbio3.1

In [248]:
print(input_signal[:10,0].T)
print(recons1[0,:10])

[ 0  0  0 -1 -2 -2 -3 -2 -1 -1]
[ 0  0  0 -1 -1 -1 -2 -1  0  0]


## prepare data for tf

In [None]:
# cut it into chunks

In [318]:
def wavelet_to_vector(input_raw, output_raw, chunk_size, wavelet_level, wavelet_type):
    current_set = 'input'
    
    w = pywt.Wavelet(wavelet_type)
    max_level = pywt.dwt_max_level(data_len=chunk_size, filter_len=w.dec_len)
    if wavelet_level > max_level:
        print('wavelet level too high. set to max level: ' + str(max_level))
        wavelet_level = max_level
    
    # short hacky loop
    while True:
        # select the correct set
        if current_set == 'input':
            data = input_raw
            amount_of_chunks = int(math.floor(data.shape[0]/chunk_size))
            input_list = []
        else:
            data = output_raw
            amount_of_chunks = int(math.floor(data.shape[0]/chunk_size))
            output_list = []

        
        index_range = (np.arange(amount_of_chunks) * chunk_size)
        indp = chunk_size - 1
        # for all chunks do this:
        for ind in index_range:
#             print(ind)
            sample1 = data[ind:ind+indp,0].T
            sample2 = data[ind:ind+indp,0].T
            coeffs1 = pywt.wavedec(sample1, wavelet_type, level=wavelet_level)
            coeffs2 = pywt.wavedec(sample2, wavelet_type, level=wavelet_level)
            unfolded1 = np.array([item for sublist in coeffs1 for item in sublist])
            unfolded2 = np.array([item for sublist in coeffs1 for item in sublist])
            vector = np.concatenate((unfolded1,unfolded2),axis=0)
            
            if current_set == 'input':
                input_list.append(vector)
            else:
                output_list.append(vector)
#             unf_arr = np.array(unfolded1)
#             print(unf_arr.shape)
#             print(len(unfolded))
            
            # for all coeff levels:
#             for i in range(len(coeffs1)):
#                 print(i)
            
        
        
        if current_set == 'output':
            break
        current_set = 'output'

    # convert lists to arrays
    input_arr = np.array(input_list)
    output_arr = np.array(output_list)
    
    # return level sizes for reconstruction
    level_sizes = []
    for cf in coeffs1:
        level_sizes.append(cf.shape[0])
    return [input_arr, output_arr, level_sizes]
# input_coeffs1[16].shape

# rows=samples, cols=dim
[input_matrix, output_matrix, level_sizes] = wavelet_to_vector(input_signal, output_signal, 1024, 6, 'db4')


In [317]:
a = np.array([4,5,7,4])
b = np.array([6,2,0,9])
# np.array([a, b])
# np.concatenate((a.T,b.T),axis=1)
# np.zeros([5,2])
# for cf in coef_test:
#     print(cf.shape)
# print(level_sizes)
np.array([a,b])
c = []
c.append(a)
c.append(b)
# np.array(c)
# a + b
# print(a.shape)
# np.concatenate((a,b),axis=0)
input_matrix.shape

(1768, 2124)