In [None]:
import numpy as np
from matplotlib import pyplot as plt
from scipy import stats, signal
from keras.models import Model, Sequential
from keras.layers import Conv2D,Conv1D, Dropout, MaxPooling2D, Flatten, Dense, Input , concatenate, MaxPooling1D, BatchNormalization
from keras.losses import categorical_crossentropy
from keras.optimizers import Adam
from keras import metrics
import os
from google.colab import drive
from sklearn.model_selection import train_test_split

In [None]:
drive.mount('/content/gdrive')
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/gdrive
Mounted at /content/drive


In [None]:
# # Yariv
# feat_space = './drive/MyDrive/audio_colab/Features/'

# Tal
feat_space = './drive/MyDrive/Voice Recording Database/Features/'

X = {}
Y = {}

class_no = 0
stat_names =[
        'spectral_centroid',
        'spectral_bandwidth',
        'rms',
        'zero_crossing_rate']

mel_orders = ['mfcc', 'del-mfcc', 'del-del-mfcc']

for spect_path in os.listdir(feat_space):
    data = {}
    if '.npy' in spect_path and 'Copy' not in spect_path:
      label, ind = spect_path.split('_')

      obj = np.load(feat_space+spect_path, allow_pickle=True)
      stats = obj[0,0]
      data['STFT'] = obj[0,1]
      mel = obj[0,2]
      data['Spect'] = obj[0,3]
      data['dSdT'] = obj[0,4]
      data['dS2dT2'] = obj[0,5]

      for i in range(len(stat_names)):
          data[stat_names[i]] = stats[i,:]

      for i in range(len(mel_orders)):
          data[mel_orders[i]] = mel[i, :, :]

      if label not in Y.keys():
          Y[label] = class_no
          X[class_no] = []
          class_no += 1

      this_class = Y[label]
      X[this_class].append(data)







In [None]:
X_train = []
Y_train = []
for i in range(class_no):
    X_train.extend(X[i])
    Y_train.extend([l for l in range(len(X[i]))])

X_train = np.array(X_train)
Y_train = np.array(Y_train)

In [None]:
def get_model_block(inp_shape, dims, p=0.05):

    if dims=='image':

      inp = Input(shape= (inp_shape[0], inp_shape[1], 1))
      x = Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=inp_shape)(inp)
      x = Conv2D(32, kernel_size=(3,3), activation='relu')(x)
      x = MaxPooling2D(pool_size=(2,2))(x)
      x = Dropout(p)(x)
      connection_layer = Flatten()(x)

    if dims=='coeffs':

      inp = Input(shape= (inp_shape[0], inp_shape[1], 1))
      x = Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=inp_shape)(inp)
      x = MaxPooling2D(pool_size=(2,2))(x)
      x = BatchNormalization()(x)
      x = Dropout(p)(x)
      connection_layer = Flatten()(x)

    if dims=='stats':
      inp = Input(shape = (inp_shape[0],1) )
      x = Conv1D(32, kernel_size=3, activation='relu')(inp)
      x = Conv1D(32, kernel_size=3, activation='relu')(x)
      x =  MaxPooling1D(2)(x)
      x = BatchNormalization()(x)
      x = Dropout(p)(x)
      connection_layer = Flatten()(x)

    m = Model(inputs = inp,outputs = connection_layer)

    return m

def model_output(models,num_class,lr=1e-3):

    if len(models) > 1 :
      outputs = concatenate([ m.output for m in models ])
      inputs = [ m.input for m in models ] 

    else:
      outputs = concatenate([models[0].output,models[0].output])
      inputs = [models[0].input]

    x = Dense(256,activation='relu')(outputs)
    x = Dense(128, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    x = Dense(32, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    output_layer = Dense(num_class, activation='softmax')(x)

    opt = Adam(lr)
    m = Model(inputs = inputs ,outputs = output_layer)
    m.compile(loss=categorical_crossentropy, optimizer=opt, metrics=['accuracy'])
    return m 

In [None]:
feats = ['rms', 'mfcc','']
dims = {'rms':'stats','STFT':'image','Spect':'image','mfcc':'coeffs','del-mfcc':'coeffs','del-del-mfcc':'coeffs'}
classes = Y_train.max()+1
class_labels = np.zeros((classes))
Y_train2 = []

for y in Y_train:
    t = np.copy(class_labels)
    t[y] = 1
    Y_train2.append(t)

Y_train2 = np.array(Y_train2)

In [None]:
def reshape_data(feat, X):
  try:
    h,w = X[0][feat].shape
    X_i = np.array([x[feat].reshape(h,w,1) for x in X])
  except:
    X_i = np.array([x[feat].reshape(-1,1) for x in X])

  return [X_i]
  

In [None]:
done=False
best_loss = 100
best_feats = []

while not done:
  losses = []
  x_test_round, x_train_round, y_test_round, y_train_round = train_test_split(X_train, Y_train2, test_size=0.75, random_state=7)
  m_list = []
  x_train = []
  x_test = []

  if len(best_feats):
    for f in best_feats :
      x_train.append(reshape_data(f, x_train_round))
      x_test.append(reshape_data(f, x_test_round))
      m_list.append(get_model_block(x_train_round[0][f].shape, dims[f]))

  for f in feats:
    if f not in best_feats:
      print(f)
      print(len(m_list))
      x_train_i = []
      x_test_i = []

      x_train_i.extend([block for block in x_train])
      x_test_i.extend([block for block in x_test])

      x_train_i.append(reshape_data(f, x_train_round))
      x_test_i.append(reshape_data(f, x_test_round))

      m_list.append(get_model_block(x_train_round[0][f].shape,dims[f]))

      m = model_output(m_list, num_class=Y_train.max()+1,lr=1e-4)

      if len(m_list) > 1:
        m.fit(x_train_i, y_train_round, epochs=20, batch_size=16, verbose=0)
        loss, acc = m.evaluate(x_test_i, y_test_round, batch_size=16)
      else:
        m.fit(x_train_i[0], y_train_round, epochs=20, batch_size=16, verbose=0)
        loss, acc = m.evaluate(x_test_i[0][0], y_test_round, batch_size=16)

      losses.append(loss)
      m_list.pop()

  best_round = min(losses)
  best_feat_round = losses.index(best_round)
  if best_round < best_loss*0.9:
    best_loss = best_round
    best_feats.append(feats[best_feat_round])
  else:
    break

rms
0
mfcc
0
mfcc
1


In [None]:
print(best_feats)

['rms']
