Downloading the necessary modules and libraries for the project.

In [1]:
# Using these lines in terminal and remember to set the environment variable to this ipynb file
# conda create --name comp.sgn.120 python=3.7.11
# conda activate comp.sgn.120
# conda install ipython=7.31.1
# conda install -c conda-forge ffmpeg

# Uncomment these lines to install the required packages if you haven't already
# !pip install pydub==0.25.1
# !pip install librosa==0.10.1
# !pip install scipy==1.7.3
# !pip install matplotlib==3.5.3
# !pip install soundfile==0.12.1
# !pip install numpy==1.21.5
# !pip install scikit-learn==1.0.2



Necessary Modules and Libraries

In [14]:
import os

import numpy as np

# Database loading and Feature extraction
from pydub import AudioSegment
import librosa as lb
import librosa.display
from scipy.signal import hamming, hann


# Representation
import IPython.display as ipd
import matplotlib.pyplot as plt

# Training and evaluating the model
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA

from sklearn.svm import SVC


Function for reading the data

In [3]:
def readFolder(folder):
    folder_names = []
    for root, dirs, files in os.walk(folder):
        for name in dirs:
            folder_names.append(os.path.join(root, name))
    return folder_names


def readFileInFolder(folder):
    file_lists = []
    for root, dirs, files in os.walk(folder):
        for name in files:
            file_lists.append(os.path.join(root, name))
    return file_lists

Function for extracting the features

In [4]:
def getMFCC(y, sr, n_mel=40, hop_size=128, n_fft=512):
    mfcc = lb.feature.mfcc(
        y=y, sr=sr, n_mfcc=n_mel, hop_length=hop_size, norm="ortho", n_fft=n_fft
    )
    return mfcc


def getMelSpectrogram(y, sr, n_mel=40, hop_size=128, n_fft=512):
    mel = lb.feature.melspectrogram(
        y=y, sr=sr, n_mels=n_mel, hop_length=hop_size, n_fft=n_fft
    )
    return mel

Utilities for the project

In [None]:
def convert_to_labels(preds, i2c, k=3):
    ans = []
    ids = []
    for p in preds:
        idx = np.argsort(p)[::-1]
        ids.append([i for i in idx[:k]])
        ans.append(' '.join([i2c[i] for i in idx[:k]]))

    return ans, ids

Extract the data

In [5]:
# Tram_Train: https://freesound.org/people/publictransport/packs/36726/
# Tram_Train: https://freesound.org/people/ali.abdelsalam/packs/36722/
# Bus_Train: https://freesound.org/people/emmakyllikki/packs/36810/
# Bus_Train: https://freesound.org/people/glingden/packs/36807/
# Tram_Test: My own recording
# Bus_Test: My own recording
folder_list = readFolder("audio")
folder_to_read = ["Bus_Test", "Bus_Train", "Tram_Test", "Tram_Train"]
bus_test = []
bus_train = []
tram_test = []
tram_train = []
label = {}
for folder in folder_list:
    # Read all the files and append to the list of files
    files = readFileInFolder(folder)
    for name in folder_to_read:
        if name in folder:
            # Append the files to the corresponding list
            if name == "Bus_Test":
                bus_test = files
                for file in files:
                    label[file] = "bus"
            elif name == "Bus_Train":
                bus_train = files
                for file in files:
                    label[file] = "bus"
            elif name == "Tram_Test":
                tram_test = files
                for file in files:
                    label[file] = "tram"
            elif name == "Tram_Train":
                tram_train = files
                for file in files:
                    label[file] = "tram"

In [6]:
# Read the audio files
bus_test_audio = []
bus_train_audio = []
tram_test_audio = []
tram_train_audio = []
for file in bus_train:
    y, sr = lb.load(file, sr=None)
    bus_train_audio.append((y, sr))
for file in tram_train:
    y, sr = lb.load(file, sr=None)
    tram_train_audio.append((y, sr))

# Read the audio files in m4a format
for file in bus_test:
    sound = AudioSegment.from_file(file, format="m4a")
    sound.export("temp.wav", format="wav")
    y, sr = lb.load("temp.wav", sr=None)
    bus_test_audio.append((y, sr))
    os.remove("temp.wav")
for file in tram_test:
    sound = AudioSegment.from_file(file, format="m4a")
    sound.export("temp.wav", format="wav")
    y, sr = lb.load("temp.wav", sr=None)
    tram_test_audio.append((y, sr))
    os.remove("temp.wav")

In [7]:
print("Number of bus train audio files: ", len(bus_train_audio))
print("Number of tram train audio files: ", len(tram_train_audio))
print("Number of bus test audio files: ", len(bus_test_audio))
print("Number of tram test audio files: ", len(tram_test_audio))


Number of bus train audio files:  39
Number of tram train audio files:  59
Number of bus test audio files:  10
Number of tram test audio files:  8
