In [1]:
import numpy as np
import tensorflow.keras
import torch
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv1D, AveragePooling1D, Conv2D, MaxPooling2D,ReLU
import tensorflow.keras.backend as K
from tensorflow.keras.models import load_model #save and load models
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, CSVLogger, ModelCheckpoint
import IPython.display as ipd
from kymatio import Scattering1D
import hitdifferentparts
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import pescador

In [2]:
import random
import os
import librosa

In [3]:
#verify if using GPU

#tf.config.list_physical_devices('GPU')
tf.config.experimental.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [4]:
# scattering order one
def getsc_new(y,J,Q_num,order):
    """
    this function outputs scattering transform of a time-domain signal.
    """
    N = len(y)
    scattering = Scattering1D(J = J,shape=(N,), Q = Q_num, max_order=order)
    Sy = scattering(torch.Tensor(y))
    return Sy

In [5]:
df_train = pd.read_csv("./train_param.csv")
df_test = pd.read_csv("./test_param.csv")
df_val = pd.read_csv("./val_param.csv")
df_full = pd.read_csv("./diffshapes_param.csv")

In [6]:
# normalization of the physical parameters
params = df_train.values[:,1:-1]
scaler = MinMaxScaler()
scaler.fit(params)
train_params_normalized = scaler.transform(params)
test_params_normalized = scaler.transform(df_test.values[:,1:-1])
val_params_normalized = scaler.transform(df_val.values[:,1:-1])

In [7]:
params.shape[0]

82224

In [8]:
def feature_sampler(df,params_normalized,idx,path_to_folder,J,Q,order):
    """
    output a {input, ground truth} pair for the designated audio sample
    """
    i=idx
    y=np.array(params_normalized[i,:]).reshape((5,)) #df.values[i,1:-1]
    path_to_audio = os.path.join(path_to_folder,str(df.values[i,0])+"_sound.wav") 
    x,fs=librosa.load(path_to_audio)
    Sy = getsc_new(x,J,Q,order)
    m,n = Sy.shape
    Sy2 = np.array(Sy).reshape((n,m))
    
    while True:
        yield {'input': Sy2,'y': y}

        
def data_generator(df, params_normalized, path_to_folder, J, Q, order, batch_size, idx, active_streamers,
                        rate, random_state=12345678):
    """
    use streamers to output a batch of {input groundtruth} pairs. 
    """
    seeds = []
    for i in idx:
        streamer = pescador.Streamer(feature_sampler, df, params_normalized, i,path_to_folder,J,Q,order)
        seeds.append(streamer)

    # Randomly shuffle the seeds
    random.shuffle(seeds)

    mux = pescador.StochasticMux(seeds, active_streamers, rate=rate, random_state=random_state)
   
    if batch_size == 1:
        return mux
    else:
        return pescador.maps.buffer_stream(mux, batch_size)


In [41]:
## first run with small number of training
epochs=12
batch_size=32
random_state=12345678
active_streamers=64
path_to_train = "/scratch/hh2263/drum_data/train/"
path_to_test = "/scratch/hh2263/drum_data/test/"
path_to_val = "/scratch/hh2263/drum_data/val"
J = 8
Q = 1
order = 2 # remember to go to order 2 eventually
train_idx = np.arange(0,params.shape[0],1)#np.arange(0,1000,1) #df_train.values[:1000,0]
test_idx = np.arange(0,df_test.values.shape[0],1) #df_test.values[:300,0]
val_idx = np.arange(0,df_val.values.shape[0],1)
train_batches=data_generator(df_train,train_params_normalized, path_to_train,J, Q, order, batch_size, train_idx,active_streamers,rate=64,random_state=random_state)
test_batches=data_generator(df_test,test_params_normalized, path_to_test,J, Q, order, batch_size, test_idx,active_streamers,rate=64,random_state=random_state)
val_batches = data_generator(df_val,val_params_normalized, path_to_val,J, Q, order, batch_size, val_idx,active_streamers,rate=64,random_state=random_state)
steps_per_epoch = len(train_idx) // batch_size

In [36]:
def make_gen(epochs,batch_size,random_state,active_streamers,J,Q,order):
    train_idx = np.arange(0,params.shape[0],1)#np.arange(0,1000,1) #df_train.values[:1000,0]
    test_idx = np.arange(0,df_test.values.shape[0],1) #df_test.values[:300,0]
    val_idx = np.arange(0,df_val.values.shape[0],1)
    train_batches=data_generator(df_train,train_params_normalized, path_to_train,J, Q, order, batch_size, train_idx,active_streamers,rate=64,random_state=random_state)
    test_batches=data_generator(df_test,test_params_normalized, path_to_test,J, Q, order, batch_size, test_idx,active_streamers,rate=64,random_state=random_state)
    val_batches = data_generator(df_val,val_params_normalized, path_to_val,J, Q, order, batch_size, val_idx,active_streamers,rate=64,random_state=random_state)
    steps_per_epoch = len(train_idx) // batch_size
    

In [30]:
print(test_idx,train_idx,val_idx)

[   0    1    2 ... 9997 9998 9999] [    0     1     2 ... 82221 82222 82223] [   0    1    2 ... 7773 7774 7775]


## Build the model

In [10]:
fname = random.choice(os.listdir(path_to_train))
rand_audio = os.path.join(path_to_train,fname)
y,sr = librosa.load(rand_audio)
Sy = getsc_new(torch.Tensor(y),J,Q,order).T
nrow, ncol = Sy.shape 
naudio = batch_size         # number of images in batch
nchan_in = 1       # number of input channels.  1 since it is BW
#input_shape = (nrow,ncol,nchan_in)  # input shape of 1 image
input_shape = Sy.shape
batch_shape = (naudio,nrow,ncol,nchan_in)  # shape of image batch
#x = Sy.reshape(batch_shape)

In [11]:
print(input_shape,batch_shape)

torch.Size([128, 43]) (32, 128, 43, 1)


In [12]:
kernel_size = (8,)
nchan_out = 16

K.clear_session()
model=Sequential()
model.add(Conv1D(input_shape=input_shape, filters=nchan_out,
                 kernel_size=kernel_size,activation= "relu", padding="same",name='conv1'))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same",name='conv2' ))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same",name='conv3' ))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same",name='conv4' ))
model.add(AveragePooling1D(pool_size=(2,)))

model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())
#what activation should be chosen for last layer, for regression problem? should be a linear function
model.add(Dense(5, activation='linear')) #output layer that corresponds to the 5 physical parameters.


# Compile the model
model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])

print(model.summary())

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1 (Conv1D)               (None, 128, 16)           5520      
_________________________________________________________________
average_pooling1d (AveragePo (None, 32, 16)            0         
_________________________________________________________________
conv2 (Conv1D)               (None, 32, 16)            2064      
_________________________________________________________________
average_pooling1d_1 (Average (None, 8, 16)             0         
_________________________________________________________________
conv3 (Conv1D)               (None, 8, 16)             2064      
_________________________________________________________________
average_pooling1d_2 (Average (None, 2, 16)             0         
_________________________________________

In [14]:
import sys
output_dir = "../output/"

if not os.path.exists(output_dir):
    os.makedirs(output_dir)
model_filepath = os.path.join(output_dir, 'model.h5')
log_filepath = os.path.join(output_dir, 'train_log.csv')

callbacks = []
callbacks.append(EarlyStopping(patience=10))
callbacks.append(ModelCheckpoint(model_filepath, save_best_only=True))
callbacks.append(CSVLogger(log_filepath))

print("Fitting model.")
sys.stdout.flush()


Fitting model.


In [42]:
train_gen = pescador.maps.keras_tuples(train_batches, 'input', 'y')
test_gen = pescador.maps.keras_tuples(test_batches, 'input', 'y')
val_gen = pescador.maps.keras_tuples(val_batches, 'input', 'y')

In [16]:
import pickle
pkl_path = '/scratch/hh2263/drum_data/val/J_8_Q_1_order_2.pkl'
pkl_file = open(pkl_path, 'rb')
Sy_val,y_val = pickle.load(pkl_file) 
Sy_val = Sy_val.reshape((Sy_val.shape[2],Sy_val.shape[0],Sy_val.shape[1]))
y_val = y_val.astype('float32')
print(Sy_val.shape,y_val.shape)

(7776, 128, 43) (7776, 5)


In [17]:
y_val_normalized = scaler.transform(y_val)
print(y_val[1,:],y_val_normalized[1,:])

[3.6306809e+03 9.4744407e-02 1.5361507e-03 2.7646576e-03 3.1666741e-01] [2.5150061e-01 2.9220518e-01 7.1812602e-04 2.6654196e-04 3.1599283e-01]


In [43]:
output_dir = "../output/"

if not os.path.exists(output_dir):
    os.makedirs(output_dir)
model_filepath = os.path.join(output_dir, "training_1/cp.ckpt")
log_filepath = os.path.join(output_dir, 'train_log.csv')

callbacks = []
callbacks.append(EarlyStopping(patience=10))
callbacks.append(ModelCheckpoint(model_filepath, save_best_only=True))
callbacks.append(CSVLogger(log_filepath))


try putting validation step in model fit - wrap both training and validation data in data generator

In [44]:
hist = model.fit(
        train_gen,
        steps_per_epoch=steps_per_epoch/5,
        epochs=10,
        validation_data=val_gen,
        validation_steps=1024,
        verbose=1,
        callbacks=callbacks,
        use_multiprocessing=True
        )

Epoch 1/10

Process Keras_worker_ForkPoolWorker-5:
Process Keras_worker_ForkPoolWorker-6:
Traceback (most recent call last):
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/pool.py", line 110, in worker
    task = get()
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/queues.py", line 352, in get
    res = self._reader.recv_bytes()
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)


KeyboardInterrupt: 

  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
Traceback (most recent call last):
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
KeyboardInterrupt
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/pool.py", line 121, in worker
    result = (True, func(*args, **kwds))
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/site-packages/tensorflow_core/python/keras/utils/data_utils.py", line 832, in next_sample
    return six.next(_SHARED_SEQUENCES[uid])
  File "/home/hh2263/miniconda3/e

In [45]:
hist = model.fit_generator(
        train_gen,
        steps_per_epoch=steps_per_epoch/5,
        epochs=10,
        validation_data=val_gen,
        validation_steps=1024,
        verbose=1,
        callbacks=callbacks,
        use_multiprocessing=True
        )

Epoch 1/10

Process Keras_worker_ForkPoolWorker-11:
Process Keras_worker_ForkPoolWorker-12:
Traceback (most recent call last):
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()


KeyboardInterrupt: 

  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/pool.py", line 110, in worker
    task = get()
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/queues.py", line 352, in get
    res = self._reader.recv_bytes()
Traceback (most recent call last):
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._targe

try putting them in a loop, use pickle file to validate, see if there's any change

In [46]:
for epoch in range(epochs):
    model.fit(train_gen,
              steps_per_epoch=steps_per_epoch/5,
              epochs=1,
              use_multiprocessing=True,
             )
    print('done fitting')
    loss,accuracy = model.evaluate(Sy_val,y_val_normalized)
    print(loss,accuracy)

done fitting
13.586680547690685 0.007973251
done fitting
16.720581855302974 0.23623972
done fitting
91.6346546691141 0.0
done fitting
14.78472523434172 0.0073302467


Process Keras_worker_ForkPoolWorker-19:
Traceback (most recent call last):


KeyboardInterrupt: 

  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/pool.py", line 121, in worker
    result = (True, func(*args, **kwds))
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/site-packages/tensorflow_core/python/keras/utils/data_utils.py", line 832, in next_sample
    return six.next(_SHARED_SEQUENCES[uid])
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/site-packages/pescador/maps.py", line 228, in keras_tuples
    for data in stream:
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/site-packages/pescador/maps.py", line 130, in buffer_stream
    for item in stream:
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/site-packages/pescador/mux

try increasing batch size - preventing accuracy jumping around

In [47]:
batch_size=64
train_batches=data_generator(df_train,train_params_normalized, path_to_train,J, Q, order, batch_size, train_idx,active_streamers,rate=64,random_state=random_state)
test_batches=data_generator(df_test,test_params_normalized, path_to_test,J, Q, order, batch_size, test_idx,active_streamers,rate=64,random_state=random_state)
val_batches = data_generator(df_val,val_params_normalized, path_to_val,J, Q, order, batch_size, val_idx,active_streamers,rate=64,random_state=random_state)
steps_per_epoch = len(train_idx) // batch_size
train_gen = pescador.maps.keras_tuples(train_batches, 'input', 'y')
for epoch in range(epochs):
    model.fit(train_gen,
              steps_per_epoch=steps_per_epoch/5,
              epochs=1,
              use_multiprocessing=True,
             )
    print('done fitting')
    loss,accuracy = model.evaluate(Sy_val,y_val_normalized)
    print(loss,accuracy)

done fitting
14.01408003975825 0.31867284
done fitting
52.18855343728399 0.0
 58/256 [=====>........................] - ETA: 2:25 - loss: 0.0203 - acc: 0.7271

Process Keras_worker_ForkPoolWorker-23:
Traceback (most recent call last):
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/multiprocessing/pool.py", line 121, in worker
    result = (True, func(*args, **kwds))
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/site-packages/tensorflow_core/python/keras/utils/data_utils.py", line 832, in next_sample
    return six.next(_SHARED_SEQUENCES[uid])
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/site-packages/pescador/maps.py", line 228, in keras_tuples
    for data in stream:
  File "/home/hh2263/miniconda3/envs/drum-1.15/lib/python3.7/site-packages/pescador/maps.py", line 130, in buffer_stream
    for item in stream:
  File "/hom

KeyboardInterrupt: 

KeyboardInterrupt


If your train error is very low, yet your test/validation is very high, then you have over-fit the model with too many epochs. reduce epoch

In [None]:
for epoch in range(epochs):
    model.fit(train_gen,steps_per_epoch=steps_per_epoch,epochs=1,use_multiprocessing=True)
    print('done fitting')
    loss,accuracy = model.evaluate(Sy_val,y_val_normalized)
    print(loss,accuracy)

In [22]:
steps_per_epoch

2569

In [23]:
for epoch in range(epochs):
    model.fit(train_gen,steps_per_epoch=100,epochs=1,use_multiprocessing=True)
    print('done fitting')
    loss,accuracy = model.evaluate(Sy_val,y_val_normalized)
    print(loss,accuracy)
    

done fitting
0.02273722944988145 0.34696501
done fitting
0.026116104014677767 0.33256173
done fitting
0.025743482358477735 0.33256173
done fitting
0.0889722095043571 0.33217594
done fitting
1.2505538949260004 0.00012860082
done fitting
3.227608641479241 0.3278035
done fitting
6.863041829670408 0.040895063
done fitting
8.608638137456321 0.05774177
done fitting
7.999003028182827 0.04385288
done fitting
9.179327376830726 0.08564815
done fitting
11.848332500261535 0.069573045
done fitting
10.095043460160126 0.31430042


## see if i can load model back

In [19]:
kernel_size = (8,)
nchan_out = 16

K.clear_session()
model=Sequential()
model.add(Conv1D(input_shape=input_shape, filters=nchan_out,
                 kernel_size=kernel_size,activation= "relu", padding="same",name='conv1'))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same",name='conv2' ))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same",name='conv3' ))
model.add(AveragePooling1D(pool_size=(4,)))
model.add(Conv1D(filters=16,
                 kernel_size=kernel_size,activation= "relu", padding="same",name='conv4' ))
model.add(AveragePooling1D(pool_size=(2,)))

model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())
#what activation should be chosen for last layer, for regression problem? should be a linear function
model.add(Dense(5, activation='linear')) #output layer that corresponds to the 5 physical parameters.


# Compile the model
model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])


loss, acc = model.evaluate(Sy_val,y_val_normalized, verbose=2)

7776/7776 - 1s - loss: 0.1682 - acc: 0.2433


In [22]:
model.load_weights("../output/model.h5")
loss,acc = model.evaluate(Sy_val,y_val_normalized, verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100*acc))

7776/7776 - 1s - loss: 0.0710 - acc: 0.3346
Restored model, accuracy: 33.46%


In [15]:
#preliminary test
hist = model.fit(
        pescador.maps.keras_tuples(train_batches, 'input', 'y'),
        steps_per_epoch=steps_per_epoch,
        epochs=2,
        validation_data=pescador.maps.keras_tuples(test_batches, 'input', 'y'),
        validation_steps=1024,
        verbose=1,
        callbacks=callbacks
        )

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 1 steps, validate for 1024 steps
Epoch 1/2
Epoch 2/2


##

In [None]:
new_model = tf.keras.models.load_model(model_file_path)
