In [None]:
import os
from os import listdir
import pathlib
from shutil import copyfile
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf
import scipy.io.wavfile as wav
import pandas as pd
from zipfile import ZipFile


from tensorflow import keras

from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras import layers
from tensorflow.keras import models
from IPython import display
from tensorflow.keras.models import Sequential


# Set seed for experiment reproducibility
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

In [None]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


# Helper Methods

**Extract content from zip**

In [None]:
def unzip(file):
  with ZipFile(file, 'r') as zipObj:
    zipObj.extractall()

## Methods to convert the audio files to spectrograms

**Convert a audio file to a spectrogram**

In [None]:
def wav_to_spectrogram(audio_path, save_path, dimensions=(128, 128), noverlap=16, cmap='gray_r'):
    sample_rate, samples = wav.read(audio_path)
    fig = plt.figure()
    fig.set_size_inches((dimensions[0]/fig.get_dpi(), dimensions[1]/fig.get_dpi()))
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.set_axis_off()
    fig.add_axes(ax)
    ax.specgram(samples, Fs=2, noverlap=noverlap)
    ax.xaxis.set_major_locator(plt.NullLocator())
    ax.yaxis.set_major_locator(plt.NullLocator())
    fig.savefig(save_path, bbox_inches="tight", pad_inches=0)
    plt.close(fig)

**Convert all audio files in a directory**

In [None]:
def convert_audio_dir_to_sectrogram(audio_dir, spectrogram_dir, dimensions=(128, 128), noverlap=16, cmap='gray_r'):
    for file in listdir(audio_dir):
        audio_path = audio_dir + file
        spectrogram_path = spectrogram_dir + file.replace('.wav', '.png')
        
        print(file)
        if ('.wav' in file):
          wav_to_spectrogram(audio_path,
                            spectrogram_path,
                            dimensions=dimensions,
                            noverlap=noverlap,
                            cmap=cmap)
   

**Create needed folders**

In [None]:
def create_needed_folders(path):
  if not os.path.exists(path):
    os.makedirs(path)

In [None]:
# Single Car Folder
single_car_audio = './single-car/'

# Multiple Car Folder
multiple_car_audio = './multiple-cars/'

# Bike Folder
bike_audio = './bike/'

# City Folder
city_audio = './city/'

In [None]:
# Spectrogram Folder
spectrograms_path = './spectrograms'

# Single Car Spectrogram Folder
single_car_spectrograms = './spectrograms/single-car/'

# Multiple Car Spectrogram Folder
multiple_cars_spectrograms = './spectrograms/multiple-cars/'

# Bike Spectrogram Folder
bike_spectrograms = './spectrograms/bike/'

# City Spectrogram Folder
city_spectrograms = './spectrograms/city/'

In [None]:
create_needed_folders(single_car_spectrograms)

create_needed_folders(multiple_cars_spectrograms)

create_needed_folders(bike_spectrograms)

create_needed_folders(city_spectrograms)

In [None]:
#people_audio = './people/'
#people_spectrograms = './spectrograms/people/'
#create_needed_folders(people_spectrograms)

# Prepare Audio Data for Training

**Extract the audio data**

In [None]:
unzip('single-car.zip')

In [None]:
unzip('multiple-cars.zip')

In [None]:
unzip('bike.zip')

In [None]:
unzip('city.zip')

**Convert all audio files to spectrograms**

In [None]:
# Single Car
convert_audio_dir_to_sectrogram(single_car_audio, single_car_spectrograms)

single_car_01_00018_0.wav
single_car_02_00222_1.wav
single_car_02_00131_1.wav
28_06_21-15_26_snippet_182_single-car_0.wav
28_06_21-15_26_snippet_121_single-car_0.wav
single_car_02_00191_1.wav
single_car_02_00067_0.wav
single_car_02_00191_0.wav
28_06_21-15_07_snippet_158_single-car_0.wav
single_car_01_00022_1.wav
28_06_21-15_26_snippet_123_single-car_0.wav
28_06_21-15_26_snippet_191_single-car_0.wav
single_car_02_00247_1.wav
28_06_21-15_26_snippet_61_single-car_0.wav
single_car_02_00156_1.wav
single_car_02_00114_0.wav
single_car_02_00005_1.wav
single_car_02_00111_0.wav
single_car_02_00066_1.wav
28_06_21-15_26_snippet_289_single-car_0.wav
28_06_21-15_26_snippet_188_single-car_0.wav
single_car_02_00119_1.wav
28_06_21-15_07_snippet_28_single-car_0.wav
single_car_02_00204_1.wav
single_car_02_00096_0.wav
28_06_21-15_26_snippet_293_single-car_0.wav
28_06_21-15_26_snippet_62_single-car_0.wav
single_car_02_00150_1.wav
single_car_01_00001_3.wav
single_car_02_00150_0.wav
single_car_01_00017_2.wav

  


single_car_02_00180_0.wav
single_car_02_00246_0.wav
single_car_02_00070_1.wav
single_car_02_00075_0.wav
single_car_02_00159_1.wav
28_06_21-15_26_snippet_192_single-car_0.wav
single_car_02_00200_1.wav
single_car_02_00199_0.wav
single_car_01_00015_1.wav
single_car_02_00221_0.wav
28_06_21-15_07_snippet_216_single-car_0.wav
single_car_02_00162_0.wav
single_car_02_00128_0.wav
single_car_02_00057_1.wav
single_car_02_00076_1.wav
single_car_02_00098_1.wav
single_car_02_00099_0.wav
28_06_21-15_07_snippet_119_single-car_0.wav
single_car_02_00155_1.wav
single_car_02_00174_0.wav
single_car_02_00178_0.wav
single_car_02_00215_0.wav
single_car_01_00024_1.wav
single_car_02_00244_0.wav
single_car_02_00011_0.wav
single_car_02_00098_0.wav
28_06_21-15_07_snippet_57_single-car_0.wav
single_car_02_00117_1.wav
single_car_01_00019_1.wav
28_06_21-15_26_snippet_85_single-car_0.wav
single_car_02_00123_1.wav
single_car_02_00016_1.wav
single_car_02_00003_0.wav
28_06_21-15_07_snippet_27_single-car_0.wav
single_car_

In [None]:
# Multiple Cars
convert_audio_dir_to_sectrogram(multiple_car_audio, multiple_cars_spectrograms)

multiple_cars_01_00005_28.wav
multiple_cars_01_00001_55.wav
multiple_cars_01_00002_14.wav
multiple_cars_01_00003_45.wav
multiple_cars_01_00004_145.wav
multiple_cars_01_00004_64.wav
multiple_cars_01_00004_143.wav
28_06_21-15_26_snippet_25_multiple-cars_0.wav
multiple_cars_01_00001_95.wav
multiple_cars_02_00000_4.wav
multiple_cars_01_00005_37.wav
multiple_cars_01_00003_60.wav
multiple_cars_01_00008_24.wav
multiple_cars_01_00004_82.wav
multiple_cars_01_00008_30.wav
multiple_cars_01_00007_68.wav
multiple_cars_01_00001_109.wav
multiple_cars_01_00004_58.wav
multiple_cars_02_00005_2.wav
multiple_cars_01_00011_8.wav
multiple_cars_01_00013_3.wav
multiple_cars_01_00004_89.wav
multiple_cars_01_00008_3.wav
multiple_cars_01_00007_3.wav
multiple_cars_02_00012_3.wav
multiple_cars_01_00009_4.wav
multiple_cars_01_00001_41.wav
28_06_21-15_07_snippet_83_multiple-cars_0.wav
multiple_cars_01_00001_108.wav
multiple_cars_02_00029_2.wav
multiple_cars_01_00008_60.wav
multiple_cars_01_00019_2.wav
multiple_cars_

In [None]:
# Bike
convert_audio_dir_to_sectrogram(bike_audio, bike_spectrograms)

motorbike_01_00002_2.wav
motorbike_01_00004_3.wav
motorbike_01_00003_0.wav
motorbike_01_00003_1.wav
motorbike_01_00003_3.wav
motorbike_01_00006_1.wav
motorbike_01_00004_1.wav
motorbike_01_00002_3.wav
motorbike_02_00003_0.wav
motorbike_02_00001_1.wav
motorbike_02_00002_1.wav
motorbike_01_00000_1.wav
motorbike_02_00003_1.wav
motorbike_01_00002_1.wav
motorbike_01_00001_2.wav
motorbike_01_00000_2.wav
motorbike_01_00003_2.wav
motorbike_02_00000_1.wav
motorbike_02_00004_1.wav
motorbike_02_00000_0.wav
motorbike_01_00006_3.wav
.DS_Store
motorbike_02_00004_0.wav
motorbike_02_00002_0.wav
motorbike_01_00005_0.wav
motorbike_01_00001_1.wav
motorbike_01_00004_2.wav
motorbike_01_00006_0.wav
motorbike_01_00004_0.wav
motorbike_01_00000_0.wav
motorbike_01_00006_2.wav
motorbike_02_00004_2.wav
motorbike_02_00001_0.wav
motorbike_01_00002_0.wav
motorbike_01_00001_3.wav
motorbike_01_00005_1.wav
motorbike_01_00001_0.wav


In [None]:
# City
convert_audio_dir_to_sectrogram(city_audio, city_spectrograms)

city_00001_533.wav
city_00001_482.wav
city_00001_631.wav
28_06_21-15_26_snippet_208_city_0.wav
city_00001_986.wav
city_00001_455.wav
city_00001_1344.wav
city_00001_1453.wav
city_00001_1716.wav
city_00013_3.wav
city_00001_1000.wav
city_00001_815.wav
city_00001_197.wav
default_windy_05_25.wav
city_00001_1457.wav
city_00001_1691.wav
28_06_21-15_07_snippet_111_city_0.wav
city_00001_813.wav
city_00000_4.wav
city_00001_1732.wav
city_00001_1040.wav
city_00001_730.wav
city_00001_23.wav
city_00001_1036.wav
city_00013_124.wav
city_00001_706.wav
city_00001_1779.wav
city_00001_829.wav
city_00000_117.wav
default_windy_05_21.wav
city_00001_1608.wav
city_00001_1387.wav
city_00001_632.wav
city_00012_2.wav
city_00001_825.wav
city_00001_839.wav
city_00001_693.wav
city_00001_611.wav
city_00001_1196.wav
city_00001_1385.wav
city_00012_68.wav
city_00001_1504.wav
city_00001_1229.wav
city_00001_116.wav
city_00013_141.wav
city_00001_1222.wav
city_00001_372.wav
city_00001_967.wav
city_00014_3.wav
city_00001_109

**Split into training / testing data**

In [None]:
training_data = './training'
testing_data = './testing'

In [None]:
create_needed_folders(training_data)
create_needed_folders(testing_data)

In [None]:
batch_size = 64
img_height = 128
img_width = 128

In [None]:
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    spectrograms_path,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size)

Found 4544 files belonging to 4 classes.
Using 3636 files for training.


In [None]:
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    spectrograms_path,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size)

Found 4544 files belonging to 4 classes.
Using 908 files for validation.


In [None]:
class_names = train_ds.class_names
print(class_names)

['bike', 'city', 'multiple-cars', 'single-car']


# Create the AI Model

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
normalization_layer = layers.experimental.preprocessing.Rescaling(1./255)

In [None]:
normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_ds))

In [None]:
num_classes = len(class_names)

model = Sequential([
    layers.experimental.preprocessing.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
    layers.Conv2D(64, 3, activation='relu'),
    layers.Conv2D(128, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Dropout(0.25),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_classes),
])

In [None]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
rescaling_1 (Rescaling)      (None, 128, 128, 3)       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 126, 126, 64)      1792      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 124, 124, 128)     73856     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 62, 62, 128)       0         
_________________________________________________________________
dropout (Dropout)            (None, 62, 62, 128)       0         
_________________________________________________________________
flatten (Flatten)            (None, 492032)            0         
_________________________________________________________________
dense (Dense)                (None, 128)               6

In [None]:
epochs=10
history = model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=epochs
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


**Save the Model**

In [None]:
!mkdir -p saved_model
model.save('saved_model/model')

INFO:tensorflow:Assets written to: saved_model/model/assets


In [None]:
!tar -czvf model.tar.gz saved_model/model/

saved_model/model/
saved_model/model/saved_model.pb
saved_model/model/assets/
saved_model/model/keras_metadata.pb
saved_model/model/variables/
saved_model/model/variables/variables.data-00000-of-00001
saved_model/model/variables/variables.index


In [None]:
!ls -la

**Convert To Tensorflow Lite (testing)**

In [None]:
# Convert the model
converter = tf.lite.TFLiteConverter.from_saved_model('./saved_model/model') # path to the SavedModel directory
tflite_model = converter.convert()

# Save the model.
with open('model.tflite', 'wb') as f:
  f.write(tflite_model)

# Test the AI Model

In [None]:
def get_prediction(file):
    img = keras.preprocessing.image.load_img(file, target_size=(128, 128))
    img_array = keras.preprocessing.image.img_to_array(img)
    img_array = tf.expand_dims(img_array, 0) # Create a batch

    predictions = model.predict(img_array)
    score = tf.nn.softmax(predictions[0])

    print(
        "This image most likely belongs to {} with a {:.2f} percent confidence."
        .format(class_names[np.argmax(score)], 100 * np.max(score))
    )

In [None]:
car_eval_audio = './output.wav'
car_eval_spectrogram = './output.png'
wav_to_spectrogram(car_eval_audio, car_eval_spectrogram, dimensions=(128, 128), noverlap=16, cmap='gray_r')

In [None]:
get_prediction(car_eval_spectrogram)