In [1]:
from comet_ml import Experiment
from comet_ml import ExistingExperiment

import IPython.display as ipd

import numpy as np

import pandas as pd

import librosa
from librosa import display  

import matplotlib.pyplot as plt

from scipy.io import wavfile as wav

from sklearn import metrics 
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split 

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import Adam
from keras.utils import to_categorical

import os

In [None]:
experiment = Experiment(
    api_key="IhQTOG01VFubRZ3TqW8IaWsKk",
    project_name="shooters",
    workspace="temasarkisov",
)

In [None]:
df = pd.read_csv('../../data/UrbanSound8K.csv') 
labels = list(df['class'].unique())

In [None]:
print(df.shape)
df.head(5)

In [None]:
files = dict()
for i in range(len(labels)):
    tmp = df[df['class'] == labels[i]][:1].reset_index()
    path = '../../data/fold{}/{}'.format(tmp['fold'][0], tmp['slice_file_name'][0])
    files[labels[i]] = path

In [None]:
'''
fig = plt.figure(figsize=(15,15)) # Log graphic of waveforms to Comet
experiment.log_image('class_examples.png')
fig.subplots_adjust(hspace=0.4, wspace=0.4)
for i, label in enumerate(labels):
    fn = files[label]
    fig.add_subplot(5, 2, i+1)
    plt.title(label)
    data, sample_rate = librosa.load(fn)
    librosa.display.waveplot(data, sr= sample_rate)
plt.savefig('class_examples.png')
'''

In [None]:
# Log graphic of waveforms to Comet
#experiment.log_image('class_examples.png')

In [None]:
# Log audio files to Comet for debugging
#for label in labels:
#    fn = files[label]
#    experiment.log_audio(fn, metadata = {'name': label})

In [None]:
fn = '../../data/fold1/191431-9-0-66.wav'
librosa_audio, librosa_sample_rate = librosa.load(fn)
scipy_sample_rate, scipy_audio = wav.read(fn)
print("Original sample rate: {}".format(scipy_sample_rate))
print("Librosa sample rate: {}".format(librosa_sample_rate))

In [None]:
print('Original audio file min~max range: {} to {}'.format(np.min(scipy_audio), np.max(scipy_audio)))
print('Librosa audio file min~max range: {0:.2f} to {0:.2f}'.format(np.min(librosa_audio), np.max(librosa_audio)))

In [None]:
# Original Audio (note that it’s in stereo — two audio sources)
plt.figure(figsize=(12, 4))
plt.plot(scipy_audio)
plt.savefig('../../img/original_audio.png')
experiment.log_image('../../img/original_audio.png')

In [None]:
# Librosa: mono track
# Librosa audio: converted to mono
plt.figure(figsize=(12,4))
plt.plot(librosa_audio)
plt.savefig('../../img/librosa_audio.png')
experiment.log_image('../../img/librosa_audio.png')

In [None]:
mfccs = librosa.feature.mfcc(y=librosa_audio, sr=librosa_sample_rate, n_mfcc = 40)

In [None]:
print(mfccs.shape)

In [None]:
plt.figure(figsize=(8,8))
librosa.display.specshow(mfccs, sr=librosa_sample_rate, x_axis='time')
plt.savefig('../../img/MFCCs.png')
experiment.log_image('../../img/MFCCs.png')

In [None]:
# Define a simple function to extract MFCCs for every file in our dataset
def extract_features(file_name):
  audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
  mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
  mfccs_processed = np.mean(mfccs.T,axis=0)
     
  return mfccs_processed

In [None]:
# Now let’s extract features
features = []
# Iterate through each sound file and extract the features 
for index, row in df.iterrows():
    file_name = os.path.join(os.path.abspath('../../data/'),'fold'+str(row["fold"])+'/', str(row["slice_file_name"]))  
    try:
        class_label = row["class"]
        data = extract_features(file_name)
        features.append([data, class_label])
        print(index)
    except:
        print(file_name, "not found!\n")

# Convert into a Pandas dataframe 
featuresdf = pd.DataFrame(features, columns=['feature','class_label'])

In [None]:
# Now that we have successfully extracted our features from the underlying audio data, we can build and train a model
featuresdf.iloc[0]['feature']

In [None]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

# Convert features and corresponding classification labels into numpy arrays
X = np.array(featuresdf.feature.tolist())
y = np.array(featuresdf.class_label.tolist())

# Encode the classification labels
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y))

In [None]:
# Split the dataset 
from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 127)

In [None]:
num_labels = yy.shape[1]
filter_size = 2

def build_model_graph(input_shape=(40,)):
  model = Sequential()
  model.add(Dense(256))
  model.add(Activation('relu'))
  model.add(Dropout(0.5))
  model.add(Dense(256))
  model.add(Activation('relu'))
  model.add(Dropout(0.5))
  model.add(Dense(num_labels))
  model.add(Activation('softmax'))
  
  # Compile the model
  model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
  return model

In [None]:
model = build_model_graph()

In [None]:
from keras.callbacks import ModelCheckpoint 
from datetime import datetime 

num_epochs = 100
num_batch_size = 32
model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), verbose=1)

In [None]:
import pickle
pickle.dump(model, open('shooters_model.sav', 'wb'))

In [63]:
# Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: {0:.2%}".format(score[1]))

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: {0:.2%}".format(score[1]))

Training Accuracy: 99.86%
Testing Accuracy: 94.29%


In [65]:
# Display model architecture summary 
model.summary()


# Calculate pre-training accuracy 
#score = model.evaluate(x_test, y_test, verbose=0)
#accuracy = 100*score[1]

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 256)               10496     
_________________________________________________________________
activation (Activation)      (None, 256)               0         
_________________________________________________________________
dropout (Dropout)            (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               65792     
_________________________________________________________________
activation_1 (Activation)    (None, 256)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 10)                2