In [1]:
import numpy as np
import tensorflow.keras
import torch
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv1D, AveragePooling1D, Conv2D, MaxPooling2D,ReLU
import tensorflow.keras.backend as K
from tensorflow.keras.models import load_model #save and load models
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, CSVLogger, ModelCheckpoint
import IPython.display as ipd
from kymatio import Scattering1D
import hitdifferentparts
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import pescador

In [2]:
import random
import os
import librosa

In [3]:
#verify if using GPU

#tf.config.list_physical_devices('GPU')
tf.config.experimental.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [4]:
# scattering order one
def getsc_new(y,J,Q_num,order):
    """
    this function outputs scattering transform of a time-domain signal.
    """
    N = len(y)
    scattering = Scattering1D(J = J,shape=(N,), Q = Q_num, max_order=order)
    Sy = scattering(torch.Tensor(y))
    return Sy

In [5]:
df_train = pd.read_csv("./train_param.csv")
df_test = pd.read_csv("./test_param.csv")
df_val = pd.read_csv("./val_param.csv")
df_full = pd.read_csv("./diffshapes_param.csv")

In [6]:
# normalization of the physical parameters
params = df_train.values[:,1:-1]
scaler = MinMaxScaler()
scaler.fit(params)
train_params_normalized = scaler.transform(params)
test_params_normalized = scaler.transform(df_test.values[:,1:-1])
val_params_normalized = scaler.transform(df_val.values[:,1:-1])

In [7]:
params.shape[0]

82224

In [8]:
def feature_sampler(df,params_normalized,idx,path_to_folder,J,Q,order):
    """
    output a {input, ground truth} pair for the designated audio sample
    """
    i=idx
    y=np.array(params_normalized[i,:]).reshape((5,)) #df.values[i,1:-1]
    path_to_audio = os.path.join(path_to_folder,str(df.values[i,0])+"_sound.wav") 
    x,fs=librosa.load(path_to_audio)
    Sy = getsc_new(x,J,Q,order)
    m,n = Sy.shape
    Sy2 = np.array(Sy).reshape((n,m))
    
    while True:
        yield {'input': Sy2,'y': y}

        
def data_generator(df, params_normalized, path_to_folder, J, Q, order, batch_size, idx, active_streamers,
                        rate, random_state=12345678):
    """
    use streamers to output a batch of {input groundtruth} pairs. 
    """
    seeds = []
    for i in idx:
        streamer = pescador.Streamer(feature_sampler, df, params_normalized, i,path_to_folder,J,Q,order)
        seeds.append(streamer)

    # Randomly shuffle the seeds
    random.shuffle(seeds)

    mux = pescador.StochasticMux(seeds, active_streamers, rate=rate, random_state=random_state)
   
    if batch_size == 1:
        return mux
    else:
        return pescador.maps.buffer_stream(mux, batch_size)


In [9]:
## first run with small number of training
epochs=12
batch_size=32
random_state=12345678
active_streamers=64
path_to_train = "/scratch/hh2263/drum_data/train/"
path_to_test = "/scratch/hh2263/drum_data/test/"
J = 8
Q = 1
order = 2 # remember to go to order 2 eventually
train_idx = np.arange(0,params.shape[0],1)#np.arange(0,1000,1) #df_train.values[:1000,0]
#test_idx = np.arange(0,300,1) #df_test.values[:300,0]
train_batches=data_generator(df_train,train_params_normalized, path_to_train,J, Q, order, batch_size, train_idx,active_streamers,rate=64,random_state=random_state)
#test_batches=data_generator(df_test,test_params_normalized, path_to_test,J, Q, order, batch_size, test_idx,active_streamers,rate=64,random_state=random_state)
steps_per_epoch = 10 #len(train_idx) // batch_size

## Build the model

In [10]:
fname = random.choice(os.listdir(path_to_train))
rand_audio = os.path.join(path_to_train,fname)
y,sr = librosa.load(rand_audio)
Sy = getsc_new(torch.Tensor(y),J,Q,order).T
nrow, ncol = Sy.shape 
naudio = batch_size         # number of images in batch
nchan_in = 1       # number of input channels.  1 since it is BW
#input_shape = (nrow,ncol,nchan_in)  # input shape of 1 image
input_shape = Sy.shape
batch_shape = (naudio,nrow,ncol,nchan_in)  # shape of image batch
#x = Sy.reshape(batch_shape)

In [11]:
print(input_shape,batch_shape)

torch.Size([128, 43]) (32, 128, 43, 1)


In [12]:
kernel_size = (8,)
nchan_out = 16

K.clear_session()
model=Sequential()
model.add(Conv1D(input_shape=input_shape, filters=nchan_out,
                 kernel_size=kernel_size,activation= "relu", padding="same",name='conv1'))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same",name='conv2' ))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same",name='conv3' ))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same",name='conv4' ))
model.add(AveragePooling1D(pool_size=(2,)))

model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())
#what activation should be chosen for last layer, for regression problem? should be a linear function
model.add(Dense(5, activation='linear')) #output layer that corresponds to the 5 physical parameters.


# Compile the model
model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])

print(model.summary())

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1 (Conv1D)               (None, 128, 16)           5520      
_________________________________________________________________
average_pooling1d (AveragePo (None, 32, 16)            0         
_________________________________________________________________
conv2 (Conv1D)               (None, 32, 16)            2064      
_________________________________________________________________
average_pooling1d_1 (Average (None, 8, 16)             0         
_________________________________________________________________
conv3 (Conv1D)               (None, 8, 16)             2064      
_________________________________________________________________
average_pooling1d_2 (Average (None, 2, 16)             0         
_________________________________________

In [13]:
import sys
output_dir = "../output/"

if not os.path.exists(output_dir):
    os.makedirs(output_dir)
model_filepath = os.path.join(output_dir, 'model.h5')
log_filepath = os.path.join(output_dir, 'train_log.csv')

callbacks = []
callbacks.append(EarlyStopping(patience=10))
callbacks.append(ModelCheckpoint(model_filepath, save_best_only=True))
callbacks.append(CSVLogger(log_filepath))

print("Fitting model.")
sys.stdout.flush()


Fitting model.


In [14]:
train_gen = pescador.maps.keras_tuples(train_batches, 'input', 'y')

In [15]:
import pickle
pkl_path = '/scratch/hh2263/drum_data/val/J_8_Q_1_order_2.pkl'
pkl_file = open(pkl_path, 'rb')
Sy_val,y_val = pickle.load(pkl_file) 

Sy_val = Sy_val.reshape((Sy_val.shape[2],Sy_val.shape[0],Sy_val.shape[1]))
#Sy_val = torch.Tensor(Sy_val)
y_val = y_val.astype('float32')
#y_val = torch.Tensor(y_val.astype('float32'))
print(Sy_val.shape,y_val.shape)

(7776, 128, 43) (7776, 5)


In [16]:
y_val_normalized = scaler.transform(y_val)
print(y_val[1,:],y_val_normalized[1,:])

[3.6306809e+03 9.4744407e-02 1.5361507e-03 2.7646576e-03 3.1666741e-01] [2.5150061e-01 2.9220518e-01 7.1812602e-04 2.6654196e-04 3.1599283e-01]


In [17]:
for epoch in range(epochs):
    model.fit(train_gen,steps_per_epoch=10,epochs=1,use_multiprocessing=True)
    print('done fitting')
    loss,accuracy = model.evaluate(Sy_val,y_val_normalized)
    print(loss,accuracy)
    

done fitting
0.13220949009551433 0.33294752
done fitting
0.09747603526453913 0.33294752
done fitting
0.06718635030949312 0.33307612
done fitting
0.04369594477914243 0.33307612
done fitting
0.028772203964583666 0.33307612
done fitting
0.022054807666244576 0.33307612
done fitting
0.020106098953818465 0.33307612
done fitting
0.01974065073692995 0.33307612
done fitting
0.01982364331567545 0.33307612
done fitting
0.019981272528216305 0.33307612
done fitting
0.020095710611974998 0.33307612
done fitting
0.0201544643996797 0.33307612


In [15]:
#preliminary test
hist = model.fit(
        pescador.maps.keras_tuples(train_batches, 'input', 'y'),
        steps_per_epoch=steps_per_epoch,
        epochs=2,
        validation_data=pescador.maps.keras_tuples(test_batches, 'input', 'y'),
        validation_steps=1024,
        verbose=1,
        callbacks=callbacks
        )

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 1 steps, validate for 1024 steps
Epoch 1/2
Epoch 2/2


##

In [None]:
new_model = tf.keras.models.load_model(model_file_path)
