In [None]:
import numpy as np
import pandas as pd
import random
import librosa
import glob
import os
from Levenshtein import distance

In [None]:
pip install python-Levenshtein

Collecting python-Levenshtein
[?25l  Downloading https://files.pythonhosted.org/packages/42/a9/d1785c85ebf9b7dfacd08938dd028209c34a0ea3b1bcdb895208bd40a67d/python-Levenshtein-0.12.0.tar.gz (48kB)
[K     |██████▊                         | 10kB 10.0MB/s eta 0:00:01[K     |█████████████▌                  | 20kB 15.0MB/s eta 0:00:01[K     |████████████████████▏           | 30kB 17.6MB/s eta 0:00:01[K     |███████████████████████████     | 40kB 11.0MB/s eta 0:00:01[K     |████████████████████████████████| 51kB 3.1MB/s 
Building wheels for collected packages: python-Levenshtein
  Building wheel for python-Levenshtein (setup.py) ... [?25l[?25hdone
  Created wheel for python-Levenshtein: filename=python_Levenshtein-0.12.0-cp36-cp36m-linux_x86_64.whl size=144809 sha256=f3df87ef1507f09385090c47a61089ac49e87c2c01fe1343763546385e588bdf
  Stored in directory: /root/.cache/pip/wheels/de/c2/93/660fd5f7559049268ad2dc6d81c4e39e9e36518766eaf7e342
Successfully built python-Levenshtein
Instal

In [None]:
def read_csv(filename):
    id_label = {}
    with open(filename,'r') as fid:
        for line in fid: # '176787-5-0-27.wav,engine_idling\n'
            tokens = line.strip().split(',') # ['176787-5-0-27.wav', 'engine_idling']
            id_label[tokens[0]] = tokens[1]
    return id_label

In [None]:
def wavs2feat(wavfiles):
    '''
    Concatenate the audio files listed in wavfiles
    Input: list of audio wav file names
    Output: Magnitude spectrogram of concatenated wav
    '''
    x = []
    for wf in wavfiles:
        x1, Fs = librosa.load(wf, sr=44100, mono=True)
        x.append(x1)
    x = np.hstack(x)
    hop = int(0.01 * Fs) # 10ms
    win = int(0.02 * Fs) # 20ms
    X = librosa.stft(x, n_fft=1024, hop_length=hop, win_length=win, window='hann', center=True, pad_mode='reflect')
    return np.abs(X)

In [None]:
def evals(gtcsv, estcsv, taskid):
    gt_id_label = read_csv(gtcsv)
    est_id_label = read_csv(estcsv)
    score = 0
    for id in est_id_label:
        if taskid==1:
            if est_id_label[id] == gt_id_label[id]:
                score += 1
        elif taskid==2:
            _, ss = editDistance(gt_id_label[id], est_id_label[id])
            score += ss
        else:
            pdb.set_trace()
            assert False, ["taskid not correct; it is", taskid]
    avgScore = score/len(est_id_label)
    return avgScore

In [None]:
def editDistance(gt, est):
    '''both are lists of labels
    E.g. gt is "dog_bark-street_music-engine_idling"
    E.g. est is "street_music-engine_idling"
    '''
    gttokens = gt.split('-')
    esttokens = est.split('-')
    # Map token to char
    tokenset = list(set(gttokens+esttokens)) # ['dog_bark', 'siren', 'street_music', 'engine_idling']
    token_char = {}
    for i in range(len(tokenset)):
        token_char[tokenset[i]] = string.ascii_uppercase[i]  # {'dog_bark': 'A', 'siren': 'B', 'street_music': 'C', 'engine_idling': 'D'}
    # convert gt and est to strings
    gtstr = [token_char[t] for t in gttokens]
    gtstr = ''.join(gtstr)  # 'BCA'
    eststr = [token_char[t] for t in esttokens]
    eststr = ''.join(eststr)  # 
    # Compare
    editdist = distance(gtstr, eststr) # 1
    score = 1 - editdist/len(gtstr)
    return editdist, score

In [None]:
def padding(X):
  if X.shape[1]!=2001:
    pad = np.zeros((X.shape[0],2001-X.shape[1]))
    X = np.hstack((X,pad))
  return X

In [None]:
classes = ['air_conditioner','car_horn','children_playing','dog_bark','drilling','engine_idling','gun_shot','jackhammer','street_music','siren']
labels = {}
for i in range(10):
  labels[classes[i]] = i

In [None]:
def assign_files_labels(filenames,labels,audio_labels):
  label_idx = []
  l = len(filenames)
  prev_id = -1
  i = 0
  j=0
  while j<l:
    id = labels[audio_labels[filenames[j]]]
    if prev_id!=id:
      id_one_hot = np.eye(10)[id]
      label_idx.append(id_one_hot)
      i+=1
    prev_id = id
    j+=1
    
  label_idx = np.array(label_idx)
  while i<5:
    i+=1
    label_idx = np.vstack((label_idx,np.eye(10)[prev_id]))
  return label_idx

In [None]:
def read_files_from_the_folder(folderName):
    files=glob.glob(folderName)
    audio_files=[os.path.basename(file) for file in files]
    files = np.array(files)
    audio_files = np.array(audio_files)
    return files,audio_files 

In [None]:
files,audio_files = read_files_from_the_folder('/content/drive/MyDrive/audio_train_1ch/*.wav')

In [None]:
audio_labels = read_csv('/content/drive/MyDrive/labels_train.csv')

In [None]:
X,y = [],[]
for i in range(2):
  n = np.random.randint(2,6)
  files_idx = np.random.randint(0,1761,n)
  #print(files_idx)
  if i%10==0:
    print('Done '+ str(i))
  wavfiles = files[files_idx]
  filenames = audio_files[files_idx]
  class_labels = assign_files_labels(filenames,labels,audio_labels)
  x = wavs2feat(wavfiles)
  x = padding(x)
  X.append(x)
  y.append(class_labels)
X = np.array(X)
y = np.array(y)

Done 0


In [None]:
np.save('/content/drive/MyDrive/Extracted/X2.npy',X)
np.save('/content/drive/MyDrive/Extracted/y2.npy',y)

In [None]:
X = np.load('/content/drive/MyDrive/Extracted/X2.npy')
y = np.load('/content/drive/MyDrive/Extracted/y2.npy')

In [None]:
print(X.shape,y.shape)

(2, 513, 2001) (2, 5, 10)


In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
import keras as K
from keras.layers import Conv2D, Input, Dense, MaxPool2D,BatchNormalization,Flatten,Activation,Dropout,GlobalMaxPool2D,GRU,Permute,Reshape,TimeDistributed
from keras.models import Model
def classifierModel(input_shape):
  hidden_states = 32
  X_input = Input(input_shape)
  #conv layers
  X = Conv2D(8,5,padding='same')(X_input)
  X = BatchNormalization()(X) 
  X = Activation('relu')(X)
  X = MaxPool2D((2,4))(X)

  X = Conv2D(16,3,padding='same')(X)
  X = BatchNormalization()(X)
  X = Activation('relu')(X)
  X = MaxPool2D((4,4))(X)
  X = Dropout(0.5)(X)

  X = Permute((1,3,2))(X)
  X = K.backend.mean(X,axis = 1)
  X = Reshape((5,-1))(X)
  
  X = GRU(units = 32,dropout=0.2, recurrent_dropout=0.2,return_sequences=True)(X)

  X = TimeDistributed(Dense(units=32))(X)
  X = TimeDistributed(Dense(units=10))(X)
  out = Activation('softmax', name='Output_Activation')(X)
  
  model = Model(inputs=X_input,outputs=out,name='Model')
  return model
model = classifierModel((513,2001,1))
model.compile(optimizer='adam',
              loss='categorical_crossentropy', # Loss
              metrics='accuracy')


In [None]:
X = X.reshape((X.shape[0],513,2001,1))
#X_test = X_test.reshape(X_test.shape[0],513,401,1)
history = model.fit(X_train,y_train,epochs=10,batch_size=8,shuffle=True,validation_data=(X_test,y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10


KeyboardInterrupt: ignored

In [None]:
model.summary()

Model: "Model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 513, 2001, 1)]    0         
_________________________________________________________________
conv2d (Conv2D)              (None, 513, 2001, 8)      208       
_________________________________________________________________
batch_normalization (BatchNo (None, 513, 2001, 8)      32        
_________________________________________________________________
activation (Activation)      (None, 513, 2001, 8)      0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 256, 500, 8)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 256, 500, 16)      1168      
_________________________________________________________________
batch_normalization_1 (Batch (None, 256, 500, 16)      64    

In [None]:
np.argmax(model.predict(X[:6]),axis = -1)

array([[ 8,  0,  8, 10, 10],
       [ 9,  5,  8, 10, 10],
       [ 5,  5,  4,  9, 10],
       [ 7,  3,  5, 10, 10],
       [ 2,  2,  0, 10, 10],
       [ 2,  3,  5, 10, 10]])

In [None]:
np.argmax(y[:6],axis = -1)

array([[ 8,  0,  5, 10, 10],
       [ 9,  8,  7, 10, 10],
       [ 5,  9,  2,  9,  7],
       [ 7,  3,  5, 10, 10],
       [ 1,  7,  8, 10, 10],
       [ 4,  3,  2, 10, 10]])

In [None]:
model.save("/content/drive/MyDrive/mlsp.h5")