Downloading the necessary modules and libraries for the project.

In [1]:
# Using these lines in terminal and remember to set the environment variable to this ipynb file
# conda create --name comp.sgn.120 python=3.11.3
# conda activate comp.sgn.120
# conda install numpy=1.26.2
# pip install ipykernel --upgrade
# conda install -c conda-forge ffmpeg

# Uncomment these lines to install the required packages if you haven't already
# !pip install pydub==0.25.1
# !pip install tqdm==4.66.1
# !pip install librosa==0.10.1
# !pip install matplotlib==3.7.2
# !pip install scikit-learn==1.3.2
# !pip install scipy==1.11.4 
# !pip install pandas==2.1.4


Necessary Modules and Libraries

In [2]:
import os

# Database loading and Feature extraction
from pydub import AudioSegment
import librosa as lb
import librosa.display
from scipy.stats import skew
from scipy.signal import hamming, hann


# Representation
import IPython.display as ipd
import matplotlib.pyplot as plt

# Data processing
import numpy as np
import pandas as pd
from tqdm import tqdm, tqdm_pandas
tqdm.pandas()
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Training
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

# Evaluation
from sklearn.metrics import accuracy_score, precision_score, recall_score




Function for reading the data

In [3]:
def readFolder(folder):
    folder_names = []
    for root, dirs, files in os.walk(folder):
        for name in dirs:
            folder_names.append(os.path.join(root, name))
    return folder_names


def readFileInFolder(folder):
    file_lists = []
    for root, dirs, files in os.walk(folder):
        for name in files:
            file_lists.append(os.path.join(root, name))
    return file_lists

Function for extracting the features

In [4]:
def getMFCC(name, path):
    n_mel=40
    hop_size=128
    n_fft=512
    data, _ = librosa.core.load(name, sr = None)
    try:
        ft1 = lb.feature.mfcc(y= data, n_mfcc=n_mel, hop_length=hop_size, norm="ortho", n_fft=n_fft)
        ft2 = librosa.feature.zero_crossing_rate(y = data)[0]
        ft3 = librosa.feature.spectral_rolloff(y= data)[0]
        ft4 = librosa.feature.spectral_centroid(y = data)[0]
        ft5 = librosa.feature.spectral_contrast(y = data)[0]
        ft6 = librosa.feature.spectral_bandwidth(y = data)[0]
        ft1_trunc = np.hstack((np.mean(ft1, axis=1), np.std(ft1, axis=1), skew(ft1, axis = 1), np.max(ft1, axis = 1), np.median(ft1, axis = 1), np.min(ft1, axis = 1)))
        ft2_trunc = np.hstack((np.mean(ft2), np.std(ft2), skew(ft2), np.max(ft2), np.median(ft2), np.min(ft2)))
        ft3_trunc = np.hstack((np.mean(ft3), np.std(ft3), skew(ft3), np.max(ft3), np.median(ft3), np.min(ft3)))
        ft4_trunc = np.hstack((np.mean(ft4), np.std(ft4), skew(ft4), np.max(ft4), np.median(ft4), np.min(ft4)))
        ft5_trunc = np.hstack((np.mean(ft5), np.std(ft5), skew(ft5), np.max(ft5), np.median(ft5), np.min(ft5)))
        ft6_trunc = np.hstack((np.mean(ft6), np.std(ft6), skew(ft6), np.max(ft6), np.median(ft6), np.max(ft6)))
        return pd.Series(np.hstack((ft1_trunc, ft2_trunc, ft3_trunc, ft4_trunc, ft5_trunc, ft6_trunc)))
    except:
        print('bad file')
        return pd.Series([0]*210)

def extractOtherFeatures(y, sr, path=None):
    features = []
    
    n_mel=40
    hop_size=128
    n_fft=512
    
    # Energy 
    energy = np.sum(np.power(y, 2))
    # RMS
    rms = np.sqrt(np.sum(np.power(y, 2)))
    # Spectrograms
    spec = np.abs(lb.stft(y, n_fft=n_fft, hop_length=hop_size))
    # Mel Spectrogram
    mel = lb.feature.melspectrogram(S=spec, n_mels=n_mel)
    # Log Mel Spectrogram
    logmel = lb.power_to_db(mel)
    # CQT Spectrogram
    cqt = np.abs(lb.cqt(y, sr=sr, hop_length=hop_size, n_bins=40, bins_per_octave=12))   

    features.append(energy)
    features.append(rms)
    features.append(spec)
    features.append(mel)
    features.append(logmel)
    features.append(cqt)    
    
    return features

Evaluating function

In [5]:
def printAccuracy(y_test, y_pred):
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average = 'macro')
    recall = recall_score(y_test, y_pred, average = 'macro')
    
    print("Accuracy: ", accuracy)
    print("Precision: ", precision)
    print("Recall: ", recall)
    return accuracy, precision, recall

Data preprocessing

In [6]:
# Tram_Train: https://freesound.org/people/publictransport/packs/36726/
# Tram_Train: https://freesound.org/people/ali.abdelsalam/packs/36722/
# Bus_Train: https://freesound.org/people/emmakyllikki/packs/36810/
# Bus_Train: https://freesound.org/people/glingden/packs/36807/
# Tram_Test: My own recording
# Bus_Test: My own recording
folder_list = readFolder("audio")
folder_to_read = ["Bus_Test", "Bus_Train", "Tram_Test", "Tram_Train"]
bus_test = []
bus_train = []
tram_test = []
tram_train = []
label = {}
for folder in folder_list:
    # Read all the files and append to the list of files
    files = readFileInFolder(folder)
    for name in folder_to_read:
        if name in folder:
            # Append the files to the corresponding list
            if name == "Bus_Test":
                bus_test = files
                for file in files:
                    label[file] = "bus"
            elif name == "Bus_Train":
                bus_train = files
                for file in files:
                    label[file] = "bus"
            elif name == "Tram_Test":
                tram_test = files
                for file in files:
                    label[file] = "tram"
            elif name == "Tram_Train":
                tram_train = files
                for file in files:
                    label[file] = "tram"

In [7]:
# Prepare Data
train_data = pd.DataFrame()
train_data["fname"] = bus_train + tram_train
test_data = pd.DataFrame()
test_data["fname"] = bus_test + tram_test

train_data = train_data["fname"].progress_apply(getMFCC, path = None)
print("done loading train mfcc")
test_data = test_data["fname"].progress_apply(getMFCC, path = None)
print("done loading test mfcc")


100%|██████████| 98/98 [00:28<00:00,  3.44it/s]


done loading train mfcc


  data, _ = librosa.core.load(name, sr = None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
100%|██████████| 18/18 [00:09<00:00,  1.92it/s]

done loading test mfcc





In [8]:
train_data["fname"] = bus_train + tram_train
train_data["label"] = train_data["fname"].apply(lambda x: label[x])

print("Train data:")
# print(train_data)


Train data:


In [9]:
test_data["fname"] = bus_test + tram_test
test_data["label"] = test_data["fname"].apply(lambda x: label[x])

print("Test data:")
# print(test_data)

Test data:


In [10]:
# Functions from Random Foresth using MFCC ttps://www.kaggle.com/amlanpraharaj/random-forest-using-mfcc-features
X = train_data.drop(['label', 'fname'], axis=1)
feature_names = list(X.columns)
X = X.values

labels = np.sort(np.unique(train_data.label.values))

num_class = len(labels)
c2i = {}
i2c = {}
for i, c in enumerate(labels):
    c2i[c] = i
    i2c[i] = c
y = np.array([c2i[x] for x in train_data.label.values])
X_test = test_data.drop(['label', 'fname'], axis=1).values
y_test = np.array([c2i[x] for x in test_data.label.values])


In [11]:
# A function that exporting the csv file from the beginning of the
def exportCSV(y_pred, y_test, filename, bus_test = bus_test, tram_test = tram_test, i2c = i2c):
    # convert the binary data into a class label (bus or tram)
    y_pred_label = []
    for i in range(len(y_pred)):
        y_pred_label.append(i2c[y_pred[i]])
    y_pred_label = np.array(y_pred_label)
    # print(y_pred_label)

    # convert the binary data into a class label (bus or tram)
    y_test_label = []
    for i in range(len(y_test)):
        y_test_label.append(i2c[y_test[i]])
    y_test_label = np.array(y_test_label)
    # print(y_test_label)
    y_test_name = []
    for i in range(len(bus_test)):
        y_test_name.append(os.path.basename(bus_test[i]))
    for i in range(len(tram_test)):
        y_test_name.append(os.path.basename(tram_test[i]))
    y_test_name = np.array(y_test_name)
    # print(y_test_name)

    # Export the CSV file using y_test_name, y_pred_label, and y_test_label
    df = pd.DataFrame({'fname': y_test_name, 'y_pred': y_pred_label, 'y_test': y_test_label})
    df.to_csv(filename, index=False)

In [None]:
def dataPreprocessing(X, X_test):
    scaler = StandardScaler()
    scaler.fit(X)
    X = scaler.transform(X)
    X_test = scaler.transform(X_test)
    pca = PCA(n_components=65).fit(X_scaled)
    X_pca = pca.transform(X_scaled)
    X_test_pca = pca.transform(X_test_scaled)
    print(sum(pca.explained_variance_ratio_))
    return X, X_test

In [12]:
# Apply scaling for PCA
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test)

In [13]:
# Apply PCA for dimension reduction
pca = PCA(n_components=65).fit(X_scaled)
X_pca = pca.transform(X_scaled)
X_test_pca = pca.transform(X_test_scaled)
print(sum(pca.explained_variance_ratio_)) 

0.9828026391973513


In [14]:
def dataProcessAndTrainAndEvaluate(X_pca, y, feature = "MFCC"):
    # Build a KNN model
    X_train, X_val, y_train, y_val = train_test_split(X_pca, y, test_size = 0.01, random_state = 42, shuffle = True)

    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X_train, y_train)
    
    # Test the kNN model with the test data
    y_pred = knn.predict(X_test_pca)
    print("KNN: ")
    printAccuracy(y_test, y_pred)
    exportCSV(y_pred, y_test, 'KNN_output.csv')
    
    # Build a SVM model
    X_train, X_val, y_train, y_val = train_test_split(X_pca, y, test_size = 0.01, random_state = 42, shuffle = True)
    clf = SVC(kernel = 'rbf', probability=True)
    clf.fit(X_train, y_train)
    
    # Test the SVM model with the test data
    y_pred = clf.predict(X_test_pca)
    print("SVM: ")
    printAccuracy(y_test, y_pred)
    exportCSV(y_pred, y_test, 'SVM_output.csv')

In [15]:
dataProcessAndTrainAndEvaluate(X_pca, y)

Accuracy:  0.7222222222222222
Precision:  0.8333333333333333
Recall:  0.6875
Accuracy:  0.7222222222222222
Precision:  0.7207792207792207
Recall:  0.7125


In [None]:
# Read the audio files
bus_test_audio = []
bus_train_audio = []
tram_test_audio = []
tram_train_audio = []
for file in bus_train:
    y, sr = lb.load(file, sr=None)
    bus_train_audio.append((y, sr))
for file in tram_train:
    y, sr = lb.load(file, sr=None)
    tram_train_audio.append((y, sr))

# Read the audio files in m4a format
for file in bus_test:
    sound = AudioSegment.from_file(file, format="m4a")
    sound.export("temp.wav", format="wav")
    y, sr = lb.load("temp.wav", sr=None)
    bus_test_audio.append((y, sr))
    os.remove("temp.wav")
for file in tram_test:
    sound = AudioSegment.from_file(file, format="m4a")
    sound.export("temp.wav", format="wav")
    y, sr = lb.load("temp.wav", sr=None)
    tram_test_audio.append((y, sr))
    os.remove("temp.wav")

In [None]:
print("Number of bus train audio files: ", len(bus_train_audio))
print("Number of tram train audio files: ", len(tram_train_audio))
print("Number of bus test audio files: ", len(bus_test_audio))
print("Number of tram test audio files: ", len(tram_test_audio))

In [None]:
tram_features = []
bus_features = []

tram_test_features = []
bus_test_features = []
# using the data from tram_train_audio and bus_train_audio
for y, sr in tram_train_audio:
    features = extractOtherFeatures(y, sr)
    tram_features.append(features)

for y, sr in bus_train_audio:
    features = extractOtherFeatures(y, sr)
    bus_features.append(features)

# using the data from tram_test_audio and bus_test_audio
for y, sr in tram_test_audio:
    features = extractOtherFeatures(y, sr)
    tram_test_features.append(features)
    
for y, sr in bus_test_audio:
    features = extractOtherFeatures(y, sr)
    bus_test_features.append(features)

In [None]:
print(len(tram_features)) # 59
print(len(tram_features[0])) # 6
print(len(bus_features)) # 39
print(len(bus_features[0])) # 6

In [None]:
# Continuing with other features
train_data_list = []
test_data_list = []
features = tram_features + bus_features
features_test = tram_test_features + bus_test_features
feature_names = ["energy", "rms", "spec", "mel", "logmel", "cqt"]
for i in range(len(bus_features[0])): # 6
    train_data = pd.DataFrame()
    test_data = pd.DataFrame()
    train_data["fname"] = bus_train + tram_train
    test_data["fname"] = bus_test + tram_test
    # for j in range(len(features)): # 98
    train_data["label"] = train_data["fname"].apply(lambda x: label[x])
    test_data["label"] = test_data["fname"].apply(lambda x: label[x])

    # take the 0 element of each feature[i]
    train_data[feature_names[i]] = [x[i] for x in features]
    test_data[feature_names[i]] = [x[i] for x in features_test]
    
    train_data_list.append(train_data)
    test_data_list.append(test_data)      

# concatenate the features_names into 1 dataframe, with keeping the fname and label columns and not concatenating them
train_data = pd.concat(train_data_list, axis=1)
test_data = pd.concat(test_data_list, axis=1)

# fix the overlapping columns
train_data = train_data.loc[:,~train_data.columns.duplicated()]
test_data = test_data.loc[:,~test_data.columns.duplicated()]



In [None]:

print("Train data:")
# print(train_data)
print(train_data.shape)


In [None]:
print("Test data:")
# print(test_data)
print(test_data.shape)