# Mount Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **FEATURE and DATA EXTRACTION**

In [None]:
import librosa
import numpy as np
import os
import glob
import pandas as pd
import random

In [None]:
def wav2feat(wavfile):
    '''
    Input: audio wav file name
    Output: Magnitude spectrogram
    '''
    x, Fs = librosa.load(wavfile, sr=44100, mono=True) 
 
    hop = int(0.01 * Fs) # 10ms
    win = int(0.02 * Fs) # 20ms
    X = librosa.stft(x, n_fft=1024, hop_length=hop, win_length=win, window='hann', center=True, pad_mode='reflect')
 
    X = np.abs(X)
 
    X_db = process(X)
      
    return X_db
 
def process(X):
 
    if(X.shape[1]<401):
      pad_size = 401 - X.shape[1]
      zero = np.zeros((X.shape[0],pad_size))
      X = np.hstack((X,zero)) 
 
    n_mels = 128
    mel = librosa.filters.mel(sr=44100, n_fft=1024, n_mels=n_mels)
    X_db = mel.dot(X)
    return X_db

In [None]:
def read_csv(filename):
    id_label = {}
    with open(filename,'r') as fid:
        for line in fid: # '176787-5-0-27.wav,engine_idling\n'
            tokens = line.strip().split(',') # ['176787-5-0-27.wav', 'engine_idling']
            id_label[tokens[0]] = tokens[1]
    return id_label

In [None]:
def train_data(audio_train_directory,csv_file):
    directory = os.fsencode(audio_train_directory)

    df = pd.read_csv(csv_file)
    id_label = read_csv(csv_file)
    Labels = list(df['class'].unique())
    Labels_int = list(range(len(Labels)))
    NUM_CLASSES = len(Labels_int)
    map_char_to_int = dict(zip(Labels,Labels_int))
    df['class_int'] = df['class'].map(map_char_to_int)
    N = df['class'].size

    X = [[] for i in range(10)]

    for key,value in id_label.items():
      if(value=='dog_bark'):
       X[0].append(key)
       random.shuffle(X[0])

      elif(value=='gun_shot'):
       X[1].append(key)
       random.shuffle(X[1])

      elif(value=='engine_idling'):
       X[2].append(key)
       random.shuffle(X[2])

      elif(value=='siren'):
       X[3].append(key)
       random.shuffle(X[3])

      elif(value=='jackhammer') :
       X[4].append(key)
       random.shuffle(X[4])

      elif(value=='drilling'):
       X[5].append(key)
       random.shuffle(X[5])

      elif(value=='children_playing'):
       X[6].append(key)
       random.shuffle(X[6])

      elif(value=='street_music'):
       X[7].append(key)
       random.shuffle(X[7])

      elif(value=='air_conditioner'):
       X[8].append(key)
       random.shuffle(X[8])

      elif(value=='car_horn'):
       X[9].append(key)  
       random.shuffle(X[9]) 
      
    X_train = [[] for i in range(10)]
    X_test = [[] for i in range(10)]

    X_train[0] = X[0][0:170]
    X_test[0] = X[0][170:]

    X_train[1] = X[1][0:50]
    X_test[1] = X[1][50:]

    X_train[2] = X[2][0:140]
    X_test[2] = X[2][140:]

    X_train[3] = X[3][0:130]
    X_test[3] = X[3][130:]

    X_train[4] = X[4][0:180]
    X_test[4] = X[4][180:]

    X_train[5] = X[5][0:170]
    X_test[5] = X[5][170:]

    X_train[6] = X[6][0:170]
    X_test[6] = X[6][170:]

    X_train[7] = X[7][0:170]
    X_test[7] = X[7][170:]

    X_train[8] = X[8][0:170]
    X_test[8] = X[8][170:]

    X_train[9] = X[9][0:50]
    X_test[9] = X[9][50:]  

    X_train_data = []
    Y_train_data = []
    X_test_data = []
    Y_test_data = []

    for i in range(len(X_train)):
      for j in range(len(X_train[i])):
        X_train_data.append(wav2feat(audio_train_directory+"/"+X_train[i][j]))
        Y_train_data.append(map_char_to_int[id_label[X_train[i][j]]])

    for i in range(len(X_test)):
      for j in range(len(X_test[i])):
        X_test_data.append(wav2feat(audio_train_directory+"/"+X_test[i][j]))
        Y_test_data.append(map_char_to_int[id_label[X_test[i][j]]]) 

    temp = list(zip(X_train_data, Y_train_data)) 
    random.shuffle(temp) 
    X_train_data, Y_train_data = zip(*temp)     

    temp = list(zip(X_test_data, Y_test_data)) 
    random.shuffle(temp) 
    X_test_data, Y_test_data = zip(*temp)    

    X_train_data = np.array(X_train_data)
    Y_train_data = np.hstack(Y_train_data)
    size = Y_train_data.size
    Y_train_data = Y_train_data.reshape(size,1)

    X_test_data = np.array(X_test_data)
    Y_test_data = np.hstack(Y_test_data)
    size = Y_test_data.size
    Y_test_data = Y_test_data.reshape(size,1)

    return X_train_data,X_test_data,Y_train_data,Y_test_data,NUM_CLASSES

# **MODEL**

In [None]:
from tensorflow import keras
import tensorflow as tf
from keras import Sequential
from keras.layers import Activation,Dense,Dropout

In [None]:
def Conv2D(N_CLASSES,input_shape):
    inputs = keras.Input(shape = input_shape)
    x = keras.layers.LayerNormalization(axis=2, name='batch_norm')(inputs)
    x = keras.layers.Conv2D(8, kernel_size=(7,7), activation='relu', padding='same', name='conv2d_relu_1')(x)
    x = keras.layers.MaxPooling2D(pool_size=(2,2), padding='same', name='max_pool_2d_1')(x)
    x = keras.layers.Dropout(rate=0.2, name='dropout_1')(x)
    x = keras.layers.Conv2D(16, kernel_size=(5,5), activation='relu', padding='same', name='conv2d_relu_2')(x)
    x = keras.layers.MaxPooling2D(pool_size=(2,2), padding='same', name='max_pool_2d_2')(x)
    x = keras.layers.Dropout(rate=0.2, name='dropout_2')(x)
    x = keras.layers.Conv2D(16, kernel_size=(3,3), activation='relu', padding='same', name='conv2d_relu_3')(x)
    x = keras.layers.MaxPooling2D(pool_size=(2,2), padding='same', name='max_pool_2d_3')(x)
    x = keras.layers.Dropout(rate=0.2, name='dropout_3')(x)
    x = keras.layers.Conv2D(32, kernel_size=(3,3), activation='relu', padding='same', name='conv2d_relu_4')(x)
    x = keras.layers.MaxPooling2D(pool_size=(2,2), padding='same', name='max_pool_2d_4')(x)
    x = keras.layers.Dropout(rate=0.2, name='dropout_4')(x)
    x = keras.layers.MaxPooling2D(pool_size=(2,2), padding='same', name='max_pool_2d_5')(x)
    x = keras.layers.Flatten(name='flatten')(x)
    x = keras.layers.Dropout(rate=0.2, name='dropout_5')(x)
    x = keras.layers.Dense(64, activation='relu', activity_regularizer=tf.keras.regularizers.l2(0.001), name='dense')(x)
    o = keras.layers.Dense(N_CLASSES, activation='softmax', name='softmax')(x)
    model = tf.keras.Model(inputs=inputs, outputs=o, name='2d_convolution')
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

#**TRAIN**

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow import keras

In [None]:
def train(X_train_data,Y_train_data,X_test_data,Y_test_data,NUM_CLASSES,Nepochs):
    
    Y_train_data = keras.utils.to_categorical(Y_train_data,NUM_CLASSES)
    X_train_data = X_train_data.reshape(X_train_data.shape[0],X_train_data.shape[1],X_train_data.shape[2],1)

    Y_test_data = keras.utils.to_categorical(Y_test_data,NUM_CLASSES)
    X_test_data = X_test_data.reshape(X_test_data.shape[0],X_test_data.shape[1],X_test_data.shape[2],1)
    
    #X_train, X_test, Y_train, Y_test = train_test_split(X_data,Y_data,test_size = 0.1)

    input_shape = tuple(X_train_data[0].shape)
    #Model = Conv2D(NUM_CLASSES, input_shape)
    #history = Model.fit(
    #x=X_train, y=Y_train,
    #validation_split = 0.2,
    #verbose=2,
    #epochs=Nepochs,
    #shuffle = True,
    #batch_size=32)

    Model1 = Conv2D(NUM_CLASSES, input_shape)
    history = Model1.fit(
    x=X_train_data, y=Y_train_data,
    validation_split = 0.2,
    verbose=2,
    epochs=Nepochs,
    shuffle = True,
    batch_size=32)

    #Model.evaluate(X_test,Y_test)
    Model1.evaluate(X_test_data,Y_test_data)

    return Model1


In [None]:
def save_model(model,filename):

    model.save(filename)
    return

In [None]:
def load_model(filename):
    
    model = keras.models.load_model(filename)
    return model

#**EVALUATE AND SAVE**

In [None]:
d1 = "/content/drive/MyDrive/project_specs_698/shared_train/audio_train"
d2 = "/content/drive/MyDrive/project_specs_698/shared_train/labels_train.csv"
X_train_data,X_test_data, Y_train_data,Y_test_data,NUM_CLASSES = train_data(d1,d2)
#np.save("/content/drive/MyDrive/project_specs_698/shared_train/X.npy",X_data)
#np.save("/content/drive/MyDrive/project_specs_698/shared_train/Y.npy",Y_data)


In [None]:
X_train_data.shape

(1400, 128, 401)

In [None]:
epochs = 100
m= train(X_train_data,Y_train_data,X_test_data,Y_test_data,NUM_CLASSES,epochs)

Epoch 1/100
35/35 - 1s - loss: 2.1109 - accuracy: 0.2134 - val_loss: 1.9812 - val_accuracy: 0.3429
Epoch 2/100
35/35 - 1s - loss: 1.7217 - accuracy: 0.4009 - val_loss: 1.7332 - val_accuracy: 0.4500
Epoch 3/100
35/35 - 1s - loss: 1.4327 - accuracy: 0.5009 - val_loss: 1.4264 - val_accuracy: 0.5357
Epoch 4/100
35/35 - 1s - loss: 1.2230 - accuracy: 0.5768 - val_loss: 1.3837 - val_accuracy: 0.5679
Epoch 5/100
35/35 - 1s - loss: 1.0809 - accuracy: 0.6455 - val_loss: 1.1431 - val_accuracy: 0.6750
Epoch 6/100
35/35 - 1s - loss: 0.9497 - accuracy: 0.7071 - val_loss: 1.1086 - val_accuracy: 0.7036
Epoch 7/100
35/35 - 1s - loss: 0.8934 - accuracy: 0.7232 - val_loss: 1.0521 - val_accuracy: 0.7321
Epoch 8/100
35/35 - 1s - loss: 0.8010 - accuracy: 0.7786 - val_loss: 0.9193 - val_accuracy: 0.7500
Epoch 9/100
35/35 - 1s - loss: 0.7095 - accuracy: 0.8018 - val_loss: 0.8843 - val_accuracy: 0.7393
Epoch 10/100
35/35 - 1s - loss: 0.6556 - accuracy: 0.8241 - val_loss: 0.8761 - val_accuracy: 0.7429
Epoch 11/

In [None]:
save_model(m,"/content/drive/MyDrive/my_model_edited")
np.save("/content/drive/MyDrive/my_model_edited/X_train_data",X_train_data)
np.save("/content/drive/MyDrive/my_model_edited/X_test_data",X_test_data)
np.save("/content/drive/MyDrive/my_model_edited/Y_train_data",Y_train_data)
np.save("/content/drive/MyDrive/my_model_edited/Y_test_data",Y_test_data)

INFO:tensorflow:Assets written to: /content/drive/MyDrive/my_model_edited/assets


In [None]:
df = pd.read_csv("/content/drive/MyDrive/project_specs_698/shared_train/labels_train.csv")
Labels = list(df['class'].unique())
Labels_int = list(range(len(Labels)))
NUM_CLASSES = len(Labels_int)
map_char_to_int = dict(zip(Labels,Labels_int))
map_char_to_int

{'air_conditioner': 8,
 'car_horn': 9,
 'children_playing': 6,
 'dog_bark': 0,
 'drilling': 5,
 'engine_idling': 2,
 'gun_shot': 1,
 'jackhammer': 4,
 'siren': 3,
 'street_music': 7}

In [None]:
m= load_model("/content/drive/MyDrive/my_model_edited")

In [None]:
m.summary()

Model: "2d_convolution"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         [(None, 128, 401, 1)]     0         
_________________________________________________________________
batch_norm (LayerNormalizati (None, 128, 401, 1)       802       
_________________________________________________________________
conv2d_relu_1 (Conv2D)       (None, 128, 401, 8)       400       
_________________________________________________________________
max_pool_2d_1 (MaxPooling2D) (None, 64, 201, 8)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 64, 201, 8)        0         
_________________________________________________________________
conv2d_relu_2 (Conv2D)       (None, 64, 201, 16)       3216      
_________________________________________________________________
max_pool_2d_2 (MaxPooling2D) (None, 32, 101, 16)    

In [None]:
############################SAMPLE TEST########################################

lst= []
direc = "/content/drive/MyDrive/project_specs_698/shared_train/sample_test_task1/feats"
f = os.fsencode(direc)

for fil in os.listdir(f):
  fi = os.fsdecode(fil)
  print(fi)
  X = np.load(direc+"/"+fi)
  X = process(X)
  #X = X.reshape(X.shape[0],X.shape[1],1) 
  #print(X.shape)   
  lst.append(X)

lst = np.array(lst)
lst = lst.reshape(lst.shape[0],lst.shape[1],lst.shape[2],1)
lst.shape

a00005.npy
a00006.npy
a00002.npy
a00004.npy
a00003.npy
a00008.npy
a00009.npy
a00010.npy
a00007.npy
a00001.npy


(10, 128, 401, 1)

In [None]:
def map(Y_pred,map_char_to_int):
  Y_pred_char = []

  for i in range(Y_pred.size):
    for key in map_char_to_int.keys():
      if(map_char_to_int[key]==Y_pred[i]):
        Y_pred_char.append(key)
        break

  return Y_pred_char  

In [None]:
a = np.argmax(m.predict(lst),axis=1)
map(a,map_char_to_int)

['jackhammer',
 'dog_bark',
 'drilling',
 'street_music',
 'drilling',
 'drilling',
 'engine_idling',
 'dog_bark',
 'children_playing',
 'drilling']

In [None]:
####################### EVALUATION ON TEST SET ####################################

directory = "/content/drive/MyDrive/feats"
direc = os.fsencode(directory)
lis_dir = {}
for np_fil in os.listdir(direc):
  fil = os.fsdecode(np_fil)
  X = process(np.load(directory+"/"+fil))
  #print(X.shape)
  label = np.argmax(m.predict(np.array([X])),axis=1)
  for key in map_char_to_int.keys():
    if(map_char_to_int[key]==label):
      lis_dir[fil]=key
      break

In [None]:
lis_dir

{'a001.npy': 'jackhammer',
 'a002.npy': 'street_music',
 'a003.npy': 'engine_idling',
 'a004.npy': 'engine_idling',
 'a005.npy': 'dog_bark',
 'a006.npy': 'children_playing',
 'a007.npy': 'drilling',
 'a008.npy': 'engine_idling',
 'a009.npy': 'air_conditioner',
 'a010.npy': 'dog_bark',
 'a011.npy': 'dog_bark',
 'a012.npy': 'engine_idling',
 'a013.npy': 'children_playing',
 'a014.npy': 'car_horn',
 'a015.npy': 'air_conditioner',
 'a016.npy': 'street_music',
 'a017.npy': 'children_playing',
 'a018.npy': 'dog_bark',
 'a019.npy': 'gun_shot',
 'a020.npy': 'drilling',
 'a021.npy': 'jackhammer',
 'a022.npy': 'engine_idling',
 'a023.npy': 'gun_shot',
 'a024.npy': 'air_conditioner',
 'a025.npy': 'air_conditioner',
 'a026.npy': 'jackhammer',
 'a027.npy': 'drilling',
 'a028.npy': 'drilling',
 'a029.npy': 'children_playing',
 'a030.npy': 'children_playing',
 'a031.npy': 'children_playing',
 'a032.npy': 'street_music',
 'a033.npy': 'siren',
 'a034.npy': 'siren',
 'a035.npy': 'children_playing',
 'a0

In [None]:
########################## Converting to .csv ##################################

import csv
field_names = ['file_name','Predicted_label']
with open('test.csv','w') as f:
  csv.DictWriter(f,fieldnames = field_names)
  for key in lis_dir.keys():
    f.write("%s,%s\n"%(key,lis_dir[key]))

In [None]:
df = pd.read_csv("test.csv")

In [None]:
df

Unnamed: 0,a002.npy,street_music
0,a001.npy,jackhammer
1,a003.npy,engine_idling
2,a004.npy,engine_idling
3,a005.npy,dog_bark
4,a007.npy,drilling
5,a008.npy,engine_idling
6,a006.npy,children_playing
7,a009.npy,air_conditioner
8,a011.npy,dog_bark
9,a012.npy,engine_idling
