In [1]:
import numpy as np
import keras
import torch
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv1D, AveragePooling1D, Conv2D, MaxPooling2D,ReLU
import tensorflow.keras.backend as K
from tensorflow.keras.models import load_model #save and load models
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import BatchNormalization
import IPython.display as ipd

from kymatio import Scattering1D
import hitdifferentparts

Using TensorFlow backend.


In [26]:
N = 2**16 #length of audio signal
J = 4
def getsc(y, J):
    """
    this function outputs scattering transform of a time-domain signal.
    """
    N = len(y)
    scattering = Scattering1D(J = J,shape=(N,))
    Sy = scattering(y)
    return Sy

## wave2shape model
1. fully connected CNN model
2. #layers ~ log(# scattering coefficients $\lambda$)
3. input: scattering transform of each percussive sounds (number of audio N, scattering coeff $\lambda$, time K)
4. output: 5 physical parameters $\theta = {\tau, p, D, \alpha, w}$


In [27]:
y = hitdifferentparts.hitdiffparts(0.5,0.5)
S = getsc(torch.Tensor(y),J)
nrow, ncol = S.shape 
naudio = 1         # number of images in batch
nchan_in = 1       # number of input channels.  1 since it is BW
input_shape = (nrow,ncol,nchan_in)  # input shape of 1 image
batch_shape = (naudio,nrow,ncol,nchan_in)  # shape of image batch
x = S.reshape(batch_shape)

--- 1.464674711227417 seconds ---


In [23]:
S = getsc(torch.Tensor(y),3)
S2 = getsc1(torch.Tensor(y),3)
print(S.shape,S2.shape)

torch.Size([8, 8192]) torch.Size([5, 8192])


In [29]:
# scattering order one
def getsc_new(y,J,Q_num,order):
    """
    this function outputs scattering transform of a time-domain signal.
    """
    N = len(y)
    scattering = Scattering1D(J = J,shape=(N,), Q = Q_num, max_order=order)
    Sy = scattering(y)
    return Sy



In [31]:
S1 = getsc_new(torch.Tensor(y),8,1,1)
S2 = getsc_new(torch.Tensor(y),8,1,2)
S3 = getsc_new(torch.Tensor(y),7,1,1)
S4 = getsc_new(torch.Tensor(y),7,1,2)
S5 = getsc_new(torch.Tensor(y),8,2,1)
S6 = getsc_new(torch.Tensor(y),8,2,2)
print(S.shape,S2.shape,S3.shape,S4.shape,S5.shape,S6.shape)

torch.Size([10, 256]) torch.Size([43, 256]) torch.Size([9, 512]) torch.Size([34, 512]) torch.Size([18, 256]) torch.Size([72, 256])


In [34]:
S7 = getsc_new(torch.Tensor(y),9,1,1)
S8 = getsc_new(torch.Tensor(y),7,2,2)
S9 = getsc_new(torch.Tensor(y),9,2,2)
print(S7.shape,S8.shape,S9.shape)

torch.Size([11, 128]) torch.Size([56, 512]) torch.Size([90, 128])


In [35]:
S10 = getsc_new(torch.Tensor(y),9,1,2)
print(S10.shape)

torch.Size([53, 128])


In [20]:

Sy = getsc1(torch.Tensor(y), J).T
input_shape = Sy.shape

kernel_size = (8,)
nchan_out = 16

K.clear_session()
model=Sequential()
model.add(Conv1D(input_shape=input_shape, filters=nchan_out,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())
#what activation should be chosen for last layer, for regression problem? should be a linear function
model.add(Dense(5, activation='linear')) #output layer that corresponds to the 5 physical parameters.

print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 4096, 16)          784       
_________________________________________________________________
average_pooling1d (AveragePo (None, 1024, 16)          0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 1024, 16)          2064      
_________________________________________________________________
average_pooling1d_1 (Average (None, 256, 16)           0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 256, 16)           2064      
_________________________________________________________________
average_pooling1d_2 (Average (None, 64, 16)            0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 64, 16)            2

In [7]:

y = torch.Tensor(np.random.randn(N))
Sy = getsc(y, J).T
input_shape = Sy.shape

kernel_size = (8,)
nchan_out = 16

K.clear_session()
model=Sequential()
model.add(Conv1D(input_shape=input_shape, filters=nchan_out,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(2,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(2,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(2,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(2,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(2,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(2,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(2,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(2,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(2,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(2,)))

model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())
#what activation should be chosen for last layer, for regression problem? should be a linear function
model.add(Dense(5, activation='linear')) #output layer that corresponds to the 5 physical parameters.

print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 4096, 16)          1680      
_________________________________________________________________
average_pooling1d (AveragePo (None, 2048, 16)          0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 2048, 16)          2064      
_________________________________________________________________
average_pooling1d_1 (Average (None, 1024, 16)          0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 1024, 16)          2064      
_________________________________________________________________
average_pooling1d_2 (Average (None, 512, 16)           0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 512, 16)           2

In [12]:
K.clear_session()
model=Sequential()
kernel_size = (3, 3)
nchan_out = 32

model.add(Conv2D(input_shape=input_shape,filters=nchan_out,
                 kernel_size=kernel_size,activation= "relu" ))
#model.add(ReLU())
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(input_shape=input_shape,filters=nchan_out,
                 kernel_size=kernel_size,activation= "relu"))
#model.add(ReLU())
model.add(MaxPooling2D(pool_size=(2,2)))
"""
model.add(Conv2D(input_shape=input_shape,filters=nchan_out,
                 kernel_size=kernel_size))
model.add(ReLU())
model.add(MaxPooling2D(pool_size=(2,2)))
"""


model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(256, activation='relu'))
#what activation should be chosen for last layer, for regression problem? should be a linear function
model.add(Dense(5, activation='sigmoid')) #output layer that corresponds to the 5 physical parameters.
#use mse for the physical parameters valid?
model.compile(loss='mean_squared_error', optimizer='adam')

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 11, 4094, 2)       20        
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 5, 2047, 2)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 3, 2045, 2)        38        
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 1, 1022, 2)        0         
_________________________________________________________________
flatten (Flatten)            (None, 2044)              0         
_________________________________________________________________
batch_normalization (BatchNo (None, 2044)              8176      
_________________________________________________________________
dense (Dense)                (None, 256)               5

In [32]:
np.log(13)

2.5649493574615367

## Build two experiments and move them into training scripts. start running them!

1. generate one scattering example
2. generate pairs of data sample (scattering,ground truth)
3. pescador streamer and data generator

In [None]:
#verify if using GPU
import tensorflow as tf
tf.test.is_gpu_available()

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [None]:
# scattering order one
def getsc_new(y,J,Q_num,order):
    """
    this function outputs scattering transform of a time-domain signal.
    """
    N = len(y)
    scattering = Scattering1D(J = J,shape=(N,), Q = Q_num, max_order=order)
    Sy = scattering(y)
    return Sy

In [17]:
df_train = pd.read_csv("./train_param.csv")
df_test = pd.read_csv("./test_param.csv")

In [31]:
# normalization of the physical parameters
params = df.values[:,1:-1]
scaler = MinMaxScaler()
scaler.fit(params)
params_normalized = scaler.transform(params)

In [16]:
def feature_sampler(df,params_normalized,idx,path_to_folder,J,Q,order):
    """
    output a {input, ground truth} pair for the designated audio sample
    """
    i=idx
    y=params_normalized[i,:] #df.values[i,1:-1]
    path_to_audio = os.path.join(path_to_folder,str(df.values[i,0])+"_sound.wav") 
    x,fs=librosa.load(path_to_audio)
    Sy = getsc_new(x,J,Q,order)
    while True:
        #np.random.shuffle(idx)
        #i=idx[0]
        yield {'input': Sy,'y': y}

        
def data_generator(df, params_normalized, path_to_folder, J, Q, order, batch_size, idx, active_streamers,
                        rate, random_state=12345678):
    """
    use streamers to output a batch of {input groundtruth} pairs. 
    """
    seeds = []
    for i in idx:
        streamer = pescador.Streamer(feature_sampler, df, params_normalized, i,path_to_folder,J,Q,order)
        seeds.append(streamer)

    # Randomly shuffle the seeds
    random.shuffle(seeds)

    mux = pescador.StochasticMux(seeds, active_streamers, rate=rate, random_state=random_state)

    return pescador.maps.buffer_stream(mux, batch_size)

In [None]:
## first run with small number of training
epochs=12
batch_size=32
random_state=12345678
active_streamers=1024
path_to_train = "/scratch/hh2263/drum_dataset/train/"
path_to_test = "/scratch/hh2263/drum_dataset/test/"
J = 8
Q = 1
order = 1 # remember to go to order 2 eventually
train_idx = df_train.values[0,:100]
test_idx = df_test.values[0,:30]
train_batches=data_generator(df,params_normalized, path_to_train,J, Q, order, batch_size, train_idx,active_streamers,rate=64,random_state=random_state)
test_batches=data_generator(df,params_normalized, path_to_train,J, Q, order, batch_size, train_idx,active_streamers,rate=64,random_state=random_state)
steps_per_epoch = len(train_idx) // batch_size

In [None]:
fname = random.choice(os.listdir(path_to_train))
rand_audio = os.path.join(path_to_train,fname)
y = librosa.load(rand_audio)
Sy = getsc_new(y,J,Q,order).T
nrow, ncol = Sy.shape 
naudio = batch_size         # number of images in batch
nchan_in = 1       # number of input channels.  1 since it is BW
input_shape = (nrow,ncol,nchan_in)  # input shape of 1 image
batch_shape = (naudio,nrow,ncol,nchan_in)  # shape of image batch
x = S.reshape(batch_shape)

In [32]:
kernel_size = (8,)
nchan_out = 16

K.clear_session()
model=Sequential()
model.add(Conv1D(input_shape=input_shape, filters=nchan_out,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same" ))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())
#what activation should be chosen for last layer, for regression problem? should be a linear function
model.add(Dense(5, activation='linear')) #output layer that corresponds to the 5 physical parameters.

print(model.summary())

NameError: name 'input_shape' is not defined

In [None]:
#preliminary test
hist = model.fit_generator(
        pescador.maps.keras_tuples(train_batches, 'input', 'y'),
        steps_per_epoch=steps_per_epoch,
        epochs=epochs,
        validation_data=pescador.maps.keras_tuples(test_batches, 'input', 'y'),
        validation_steps=1024,
        verbose=1
        )