In [48]:
import numpy as np
import tensorflow.keras
import torch
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv1D, AveragePooling1D, Conv2D, MaxPooling2D,ReLU
import tensorflow.keras.backend as K
from tensorflow.keras.models import load_model #save and load models
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import BatchNormalization
import IPython.display as ipd
from kymatio import Scattering1D
import hitdifferentparts
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import pescador

In [2]:
import random
import os
import librosa

In [3]:
#verify if using GPU

tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [4]:
# scattering order one
def getsc_new(y,J,Q_num,order):
    """
    this function outputs scattering transform of a time-domain signal.
    """
    N = len(y)
    scattering = Scattering1D(J = J,shape=(N,), Q = Q_num, max_order=order)
    Sy = scattering(torch.Tensor(y))
    return Sy

In [5]:
df_train = pd.read_csv("./train_param.csv")
df_test = pd.read_csv("./test_param.csv")
df_val = pd.read_csv("./val_param.csv")
df_full = pd.read_csv("./diffshapes_param.csv")

In [6]:
idx = df_train.values[20,0]
print(df_full.values[idx,:],df_train.values[20,:])

[75404 9749.802795959802 0.1715442523609866 0.007251944954821789
 0.000253620438009476 0.4756837330810375 'train'] [75404 9749.802795959802 0.1715442523609866 0.007251944954821789
 0.000253620438009476 0.4756837330810375 'train']


In [7]:
# normalization of the physical parameters
params = df_train.values[:,1:-1]
scaler = MinMaxScaler()
scaler.fit(params)
train_params_normalized = scaler.transform(params)
test_params_normalized = scaler.transform(df_test.values[:,1:-1])
val_params_normalized = scaler.transform(df_val.values[:,1:-1])

In [8]:
test_params_normalized[1,:]

array([3.29815433e-02, 6.46686154e-01, 1.72053132e-04, 3.97170245e-02,
       8.47279389e-01])

In [9]:
def feature_sampler(df,params_normalized,idx,path_to_folder,J,Q,order):
    """
    output a {input, ground truth} pair for the designated audio sample
    """
    i=idx
    y=np.array(params_normalized[i,:]).reshape((5,)) #df.values[i,1:-1]
    path_to_audio = os.path.join(path_to_folder,str(df.values[i,0])+"_sound.wav") 
    x,fs=librosa.load(path_to_audio)
    Sy = getsc_new(x,J,Q,order)
    while True:
        yield {'input': np.array(Sy),'y': y}

        
def data_generator(df, params_normalized, path_to_folder, J, Q, order, batch_size, idx, active_streamers,
                        rate, random_state=12345678):
    """
    use streamers to output a batch of {input groundtruth} pairs. 
    """
    seeds = []
    for i in idx:
        streamer = pescador.Streamer(feature_sampler, df, params_normalized, i,path_to_folder,J,Q,order)
        seeds.append(streamer)

    # Randomly shuffle the seeds
    random.shuffle(seeds)

    mux = pescador.StochasticMux(seeds, active_streamers, rate=rate, random_state=random_state)
   
    if batch_size == 1:
        return mux
    else:
        return pescador.maps.buffer_stream(mux, batch_size)


In [31]:
## first run with small number of training
epochs=12
batch_size=32
random_state=12345678
active_streamers=1024
path_to_train = "/scratch/hh2263/drum_data/train/"
path_to_test = "/scratch/hh2263/drum_data/test/"
J = 8
Q = 2
order = 1 # remember to go to order 2 eventually
train_idx = np.arange(0,1000,1) #df_train.values[:1000,0]
test_idx = np.arange(0,300,1) #df_test.values[:300,0]
train_batches=data_generator(df_train,train_params_normalized, path_to_train,J, Q, order, batch_size, train_idx,active_streamers,rate=64,random_state=random_state)
test_batches=data_generator(df_test,test_params_normalized, path_to_test,J, Q, order, batch_size, test_idx,active_streamers,rate=64,random_state=random_state)
steps_per_epoch = len(train_idx) // batch_size

## Build the model

In [32]:
fname = random.choice(os.listdir(path_to_train))
rand_audio = os.path.join(path_to_train,fname)
y,sr = librosa.load(rand_audio)
Sy = getsc_new(torch.Tensor(y),J,Q,order).T
nrow, ncol = Sy.shape 
naudio = batch_size         # number of images in batch
nchan_in = 1       # number of input channels.  1 since it is BW
#input_shape = (nrow,ncol,nchan_in)  # input shape of 1 image
input_shape = Sy.shape
batch_shape = (naudio,nrow,ncol,nchan_in)  # shape of image batch
#x = Sy.reshape(batch_shape)

In [33]:
print(input_shape,batch_shape)

torch.Size([128, 18]) (32, 128, 18, 1)


In [50]:
kernel_size = (8,)
nchan_out = 16

K.clear_session()
model=Sequential()
model.add(Conv1D(input_shape=(128,18,), filters=nchan_out,
                 kernel_size=kernel_size,activation= "relu", padding="same",name='conv1'))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same",name='conv2' ))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same",name='conv3' ))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same",name='conv4' ))
model.add(AveragePooling1D(pool_size=(2,)))

model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())
#what activation should be chosen for last layer, for regression problem? should be a linear function
model.add(Dense(5, activation='linear')) #output layer that corresponds to the 5 physical parameters.


# Compile the model
model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])

print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1 (Conv1D)               (None, 128, 16)           2320      
_________________________________________________________________
average_pooling1d (AveragePo (None, 32, 16)            0         
_________________________________________________________________
conv2 (Conv1D)               (None, 32, 16)            2064      
_________________________________________________________________
average_pooling1d_1 (Average (None, 8, 16)             0         
_________________________________________________________________
conv3 (Conv1D)               (None, 8, 16)             2064      
_________________________________________________________________
average_pooling1d_2 (Average (None, 2, 16)             0         
_________________________________________________________________
conv4 (Conv1D)               (None, 2, 16)             2

In [43]:
train_gen = pescador.maps.keras_tuples(train_batches, 'input', 'y')
test_gen = pescador.maps.keras_tuples(test_batches, 'input', 'y')

In [46]:
model.fit(train_gen,use_multiprocessing=False)

  ...
    to  
  ['...']
      1/Unknown - 0s 58ms/step

InvalidArgumentError:  input depth must be evenly divisible by filter depth: 128 vs 18
	 [[node sequential/conv1/conv1d (defined at <ipython-input-44-b09609241779>:1) ]] [Op:__inference_distributed_function_10109]

Function call stack:
distributed_function


In [113]:
#preliminary test
hist = model.fit(
        train_gen,
        steps_per_epoch=steps_per_epoch,
        epochs=epochs,
        validation_data=test_gen,
        validation_steps=1024,
        verbose=2
        )

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 31 steps, validate for 1024 steps
Epoch 1/12


InvalidArgumentError:  input depth must be evenly divisible by filter depth: 128 vs 10
	 [[node sequential/conv1d/conv1d (defined at <ipython-input-113-c116cca03a34>:8) ]] [Op:__inference_distributed_function_4467]

Function call stack:
distributed_function


##

In [104]:
np.sum(df_train.values[:,0]==51710)

1