In [1]:
import pandas as pd
import numpy as np
import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
metadata = pd.read_csv('/content/drive/MyDrive/labelled_file_Sheet1.csv')

In [4]:
metadata.head()

Unnamed: 0,File_Name,Label
0,music_1.wav,Music
1,music_2.wav,Music
2,music_3.wav,Music
3,music_4.wav,Music
4,music_5.wav,Music


In [5]:
metadata['Label'].value_counts()

Music           22
Speech          18
Music+Speech    10
Name: Label, dtype: int64

In [6]:
import os
audio_path = '/content/drive/MyDrive/MLSP_Gp.17/training_set_selfmade_0/'

In [45]:
# a, sr = librosa.load('test_sample-4.npy', res_type='kaiser_best')
# # mfcc = librosa.feature.mfcc(y=a, sr=16000, n_mfcc=40)
# a.shape
sr = 16000
my_stft = np.load('test_sample-4.npy')

mels = librosa.feature.melspectrogram(S=my_stft, sr=sr, n_mels=64)
log_mels = librosa.core.amplitude_to_db(mels, ref=np.max)
mfcc = librosa.feature.mfcc(S=log_mels, sr=sr, n_mfcc=20)

In [49]:
def featuresExtractor(filename):
  sample_rate = 16000
  if filename.endswith('.npy'):
    filepath = os.path.abspath(filename)
    my_stft = np.load(filepath)
    mels = librosa.feature.melspectrogram(S=my_stft, sr=sr, n_mels=64)
    log_mels = librosa.core.amplitude_to_db(mels, ref=np.max)
    mfccs_features = librosa.feature.mfcc(S=log_mels, sr=sr, n_mfcc=40)
  else:
    audio, sample_rate = librosa.load(filename, res_type='kaiser_best')
    mfccs_features = librosa.feature.mfcc(y=audio, sr= 16000, n_mfcc=40)

  mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
  return mfccs_scaled_features

In [50]:
from tqdm import tqdm
extracted_features=[]

for index_num, row in tqdm(metadata.iterrows()):
  file_name=os.path.join(os.path.abspath(audio_path), str(row['File_Name']))
  final_class_labels=row['Label']
  data = featuresExtractor(filename=file_name)
  extracted_features.append([data, final_class_labels])

50it [00:19,  2.59it/s]


In [51]:
extracted_features_df=pd.DataFrame(extracted_features,columns=['feature','label'])
extracted_features_df.head()

Unnamed: 0,feature,label
0,"[-211.58139, 84.72968, -56.21672, 46.320293, -...",Music
1,"[-183.81102, 78.271164, -62.15846, 44.878735, ...",Music
2,"[-210.03279, 75.51148, -55.351326, 49.253624, ...",Music
3,"[-223.5702, 73.58966, -55.127224, 46.11475, -2...",Music
4,"[-213.40372, 71.130905, -51.736862, 49.11726, ...",Music


In [52]:
X=np.array(extracted_features_df['feature'].tolist())
y=np.array(extracted_features_df['label'].tolist())

In [53]:
X.shape

(50, 40)

In [54]:
from sklearn.preprocessing import LabelEncoder
label_encoder=LabelEncoder()
y=np.array(pd.get_dummies(y))

In [55]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

In [56]:
X_train.shape

(40, 40)

In [57]:
y_train.shape

(40, 3)

In [58]:
X_test.shape

(10, 40)

In [59]:
y_test.shape

(10, 3)

In [60]:
y

array([[1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1]], dtype=uint8)

Data Preprocessing

In [61]:
import tensorflow as tf

In [62]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten
from tensorflow.keras.optimizers import Adam
from sklearn import metrics

In [63]:
num_labels=y.shape[1]

In [64]:
model=Sequential()
###first layer
model.add(Dense(100,input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
###second layer
model.add(Dense(200))
model.add(Activation('relu'))
model.add(Dropout(0.5))
###third layer
model.add(Dense(100))
model.add(Activation('relu'))
model.add(Dropout(0.5))

###final layer
model.add(Dense(num_labels))
model.add(Activation('softmax'))

In [65]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 100)               4100      
                                                                 
 activation_4 (Activation)   (None, 100)               0         
                                                                 
 dropout_3 (Dropout)         (None, 100)               0         
                                                                 
 dense_5 (Dense)             (None, 200)               20200     
                                                                 
 activation_5 (Activation)   (None, 200)               0         
                                                                 
 dropout_4 (Dropout)         (None, 200)               0         
                                                                 
 dense_6 (Dense)             (None, 100)              

In [66]:
model.compile(loss='categorical_crossentropy',metrics=['accuracy'], optimizer='adam')

In [67]:
from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime 

num_epochs = 200
num_batch_size = 32

checkpointer = ModelCheckpoint(filepath='/content/drive/MyDrive/audio_classification.hdf5', 
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/200
Epoch 00001: val_loss improved from inf to 4.11486, saving model to /content/drive/MyDrive/audio_classification.hdf5
Epoch 2/200
Epoch 00002: val_loss did not improve from 4.11486
Epoch 3/200
Epoch 00003: val_loss did not improve from 4.11486
Epoch 4/200
Epoch 00004: val_loss did not improve from 4.11486
Epoch 5/200
Epoch 00005: val_loss did not improve from 4.11486
Epoch 6/200
Epoch 00006: val_loss did not improve from 4.11486
Epoch 7/200
Epoch 00007: val_loss did not improve from 4.11486
Epoch 8/200
Epoch 00008: val_loss did not improve from 4.11486
Epoch 9/200
Epoch 00009: val_loss did not improve from 4.11486
Epoch 10/200
Epoch 00010: val_loss did not improve from 4.11486
Epoch 11/200
Epoch 00011: val_loss did not improve from 4.11486
Epoch 12/200
Epoch 00012: val_loss did not improve from 4.11486
Epoch 13/200
Epoch 00013: val_loss did not improve from 4.11486
Epoch 14/200
Epoch 00014: val_loss improved from 4.11486 to 3.58093, saving model to /content/drive/MyDrive/aud

In [68]:
test_accuracy=model.evaluate(X_test,y_test,verbose=0)
print(test_accuracy[1])

0.8999999761581421


Testing the Model 

In [138]:
def get_output(filepath):
  output = []
  mfccs_scaled_features = featuresExtractor(filepath)
  mfccs_scaled_features = mfccs_scaled_features.reshape(1,-1)
  # print(mfccs_scaled_features)
  # print(mfccs_scaled_features.shape)
  pred_label = model.predict(mfccs_scaled_features)
  # print(pred_label)

  # When label = Music
  if (pred_label[:,0] >= pred_label[:,1]) and (pred_label[:,0] >= pred_label[:,2]):
    output = '1', '0'

  # When label = Music+Speech
  elif (pred_label[:,1] >= pred_label[:,0]) and (pred_label[:,1] >= pred_label[:,2]):
    output = '1', '1'

  # When label = Speech
  else:
    output = '0, 1'

  return output

In [139]:
filepath = '/content/drive/MyDrive/training_set_selfmade_1/music_noisy1.wav'

In [140]:
x = get_output('/content/drive/MyDrive/spectrogram/test_sample-8.npy')
x

('1', '1')

In [141]:
def test_data(filepath):
  prediction = []
  filename = os.path.basename(filepath)
  file_name = os.path.splitext(filename)[0]
  label = get_output(filepath)
  prediction.append([file_name, label])
  prediction_df = pd.DataFrame(prediction, columns=['file_name', 'Label'])
  return prediction

In [142]:
path = '/content/drive/MyDrive/training_set_selfmade_1/S002.wav'
result = test_data(path)
result


[['S002', '0, 1']]

In [143]:
path = 'test_sample-4.npy'
result = test_data(path)
result

[['test_sample-4', ('1', '1')]]

In [158]:
 def loaded_output(directory):
  loaded_output = []
  for filename in os.listdir(directory):
    f = os.path.join(directory, filename)
    data = test_data(f)
    loaded_output.append([data])
    loaded_output_df = pd.DataFrame(loaded_output, columns=['Filename'])
    task_file = loaded_output_df.to_csv('Task2.csv')

  return task_file

In [153]:
x = loaded_output('/content/drive/MyDrive/spectrogram')
x

Unnamed: 0,Filename
0,"[[test_sample-4, (1, 1)]]"
1,"[[test_sample-9, (1, 1)]]"
2,"[[test_sample-7, (1, 1)]]"
3,"[[test_sample-0, (1, 1)]]"
4,"[[test_sample-5, (1, 1)]]"
5,"[[test_sample-2, (1, 1)]]"
6,"[[test_sample-8, (1, 1)]]"
7,"[[test_sample-3, (1, 1)]]"
8,"[[test_sample-1, (1, 1)]]"
9,"[[test_sample-6, (1, 1)]]"
