In [None]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from os import mkdir, path
import pandas as pd


class Sound:
    def __init__(self, path):
        self.sound_path = path

    def export_melgram(self, export_path):
        MELS = 128
        samples, sample_rate = librosa.load(self.sound_path, sr=None)
        sgram = librosa.stft(samples)
        sgram_mag, _ = librosa.magphase(sgram)
        mel_scale_sgram = librosa.feature.melspectrogram(
            S=sgram_mag, sr=sample_rate, n_mels=MELS)
        mel_sgram = librosa.amplitude_to_db(mel_scale_sgram, ref=np.min)

        # hack to make each mel have one pixel of height
        fig, ax = plt.subplots(figsize=(256 / 100, 2*MELS / 100))
        librosa.display.specshow(mel_sgram, ax=ax)
        ax.axis('off')
        plt.tight_layout(pad=0)
        plt.savefig(export_path, format='jpg',
                    bbox_inches='tight', pad_inches=0)
        # memory leak fix
        plt.close(fig)
        fig.clf()
        del fig, ax

In [None]:
class UrbanSound8K:
    def __init__(self, dataset_path: str, meta_path: str, audio_path: str):
        self.audio_path = audio_path
        self.images_path = path.join(dataset_path, "images")
        self.data = pd.read_csv(meta_path)

    def folds(self):
        folds = []
        for i in range(1, 11):
            fold = path.join(self.images_path, "fold"+str(i))
            folds.append(fold)
        return folds

    def generate_images(self):
        self._ensure_image_folders()
        for i in range(len(self.data)):
            row = self.data.iloc[i]
            sound_file_path = path.join(
                self.audio_path, "fold"+str(row['fold']), row['slice_file_name'])
            jpeg_folder = path.join(
                self.images_path, "fold"+str(row['fold']), row['class'])
            self._ensure_folder(jpeg_folder)
            jpeg_path = path.join(jpeg_folder, row['slice_file_name'])+'.jpg'
            Sound(sound_file_path).export_melgram(jpeg_path)

    def _ensure_image_folders(self):
        print("Creating image folders")
        self._ensure_folder(self.images_path)
        for i in range(1, 11):
            self._ensure_folder(path.join(self.images_path, "fold"+str(i)))

    def _ensure_folder(self, folder):
        try:
            mkdir(folder)
        except FileExistsError:
            pass

FileNotFoundError: [Errno 2] No such file or directory: '~/Desktop/University/Multimedia/lab1'

In [None]:
import tensorflow as tf
import keras.utils
import keras.preprocessing
import keras.layers
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sn
import keras
import functools
import os
os.environ["KERAS_BACKEND"] = "tensorflow"


class UrbanSoundClassifier:

    def __init__(self, dataset):
        self._dataset = dataset
        self.EPOCHS = 20

    def evaluate(self):

        print(keras.__version__)
        fold_img_folders = self._dataset.folds()

        validate_folds = [1, 2, 3]
        train = [x for j, x in enumerate(
            fold_img_folders) if j in validate_folds]
        train_datasets = [self._dataset_from_folder(
            fold_img_folders[j]) for j in range(len(train))]
        train_ds = functools.reduce(
            lambda x, y: x.concatenate(y), train_datasets)

        self.class_names = self._dataset_from_folder(
            fold_img_folders[0]).class_names

        validation = [x for j, x in enumerate(
            fold_img_folders) if j not in validate_folds]
        validation_datasets = [self._dataset_from_folder(
            fold_img_folders[j]) for j in range(len(validation))]
        validation_ds = functools.reduce(
            lambda x, y: x.concatenate(y), validation_datasets)
        model = self._create_model()
        model.fit(train_ds, epochs=self.EPOCHS)
        model.evaluate(validation_ds)

        self._display_confusion_matrix(
            self._calculate_confusion_matrix(model, validation_ds))

    def _display_confusion_matrix(self, matrix):
        df_cm = pd.DataFrame(matrix, index=self.class_names,
                             columns=self.class_names)
        plt.figure(figsize=(10, 7))
        sn.heatmap(df_cm, annot=True, fmt=".2f", cmap='Blues')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.show()

    def _calculate_confusion_matrix(self, model: keras.Sequential, ds):
        predicted_classes_y = np.argmax(model.predict(ds), axis=1)
        true_labels = np.concatenate([np.argmax(y, axis=1) for x, y in ds])
        print(true_labels)
        print(predicted_classes_y)
        return confusion_matrix(true_labels, predicted_classes_y, normalize='pred')

    def _dataset_from_folder(self, folder):
        return tf.keras.preprocessing.image_dataset_from_directory(
            folder,
            labels='inferred',
            label_mode='categorical',
            color_mode='grayscale',
            batch_size=50,
            image_size=(256, 256),
            seed=2,
        )

    def _create_model(self):
        model = keras.Sequential([
            keras.layers.Conv2D(32, (12, 12), activation='relu',
                                input_shape=(256, 256, 1)),
            keras.layers.MaxPooling2D((8, 8)),
            keras.layers.Conv2D(64, (3, 3), activation='relu'),
            keras.layers.MaxPooling2D((2, 2)),
            keras.layers.Conv2D(128, (3, 3), activation='relu'),
            keras.layers.MaxPooling2D((2, 2)),
            keras.layers.Flatten(),
            keras.layers.Dense(128, activation='relu'),
            keras.layers.Dropout(0.1),
            keras.layers.Dense(10, activation='softmax')
        ])
        model.compile(optimizer='adam',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
        return model

In [None]:
from classifier import UrbanSoundClassifier

classifier = UrbanSoundClassifier(ds)
classifier.evaluate()