# Install Libraries

In [8]:
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import joblib

# Functions

## Extract Features

In [9]:
def extract_features_with_timing(file_path, start_time, end_time):
    audio, sample_rate = librosa.load(file_path, offset=start_time, duration=end_time - start_time, res_type='kaiser_fast')
    features = np.mean(librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40).T, axis=0)
    return features


## Load and Save file

In [10]:
def label_audio_dataset_with_timing(dataset_path):
    data = []

    for class_label in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_label)

        if os.path.isdir(class_path):
            for audio_file in os.listdir(class_path):
                file_path = os.path.join(class_path, audio_file)

                # Check if the file is an audio file
                if audio_file.lower().endswith(('.wav', '.wm.wav')):
                    print(f"Processing: {file_path}")

                    # Form the correct text file name
                    text_file_name = f"{audio_file}.txt"
                    text_file_path = os.path.join(class_path, text_file_name)

                    if os.path.exists(text_file_path):
                        print(f"Found corresponding text file: {text_file_path}")

                        # Read all lines from the text file
                        with open(text_file_path, 'r') as f:
                            for line in f:
                                # Split the line into start and end times
                                start_time, end_time = map(float, line.split('\t'))

                                print(f"Start time: {start_time}, End time: {end_time}")

                                # Extract features for the specified time range
                                features = extract_features_with_timing(file_path, start_time, end_time)

                                data.append({
                                    'file_path': file_path,
                                    'features': features.tolist(),
                                    'class_label': class_label
                                })

                                print(f"Processed: {file_path}, Start: {start_time}, End: {end_time}\n")

                    else:
                        print(f"No corresponding text file found for: {file_path}. Using the entire duration.")

                        # Set start_time to 0 and end_time to the entire duration
                        start_time, end_time = 0, librosa.get_duration(filename=file_path)

                        print(f"Start time: {start_time}, End time: {end_time}")

                        # Extract features for the entire duration
                        features = extract_features_with_timing(file_path, start_time, end_time)

                        data.append({
                            'file_path': file_path,
                            'features': features.tolist(),
                            'class_label': class_label
                        })

                        print(f"Processed: {file_path}, Start: {start_time}, End: {end_time}\n")

    print(f"Number of samples in dataset: {len(data)}")
    return pd.DataFrame(data)


In [11]:
dataset_path = 'DATA_NIGENS'
labeled_data = label_audio_dataset_with_timing(dataset_path)


Processing: DATA_NIGENS\alarm\Alarm+8001_04.wav
Found corresponding text file: DATA_NIGENS\alarm\Alarm+8001_04.wav.txt
Start time: 0.024943, End time: 22.414966
Processed: DATA_NIGENS\alarm\Alarm+8001_04.wav, Start: 0.024943, End: 22.414966

Processing: DATA_NIGENS\alarm\Alarm+8001_04.wm.wav
No corresponding text file found for: DATA_NIGENS\alarm\Alarm+8001_04.wm.wav. Using the entire duration.
Start time: 0, End time: 1.8575510204081633
Processed: DATA_NIGENS\alarm\Alarm+8001_04.wm.wav, Start: 0, End: 1.8575510204081633

Processing: DATA_NIGENS\alarm\Alarm+8001_06.wav
Found corresponding text file: DATA_NIGENS\alarm\Alarm+8001_06.wav.txt
Start time: 0.038549, End time: 1.022676
Processed: DATA_NIGENS\alarm\Alarm+8001_06.wav, Start: 0.038549, End: 1.022676

Start time: 1.614512, End time: 2.659864
Processed: DATA_NIGENS\alarm\Alarm+8001_06.wav, Start: 1.614512, End: 2.659864

Start time: 3.190476, End time: 4.204082
Processed: DATA_NIGENS\alarm\Alarm+8001_06.wav, Start: 3.190476, End: 

	This alias will be removed in version 1.0.
  start_time, end_time = 0, librosa.get_duration(filename=file_path)


Processed: DATA_NIGENS\alarm\Alarm+8001_06.wav, Start: 9.52381, End: 10.569161

Start time: 11.099773, End time: 12.145125
Processed: DATA_NIGENS\alarm\Alarm+8001_06.wav, Start: 11.099773, End: 12.145125

Start time: 12.673469, End time: 13.689342
Processed: DATA_NIGENS\alarm\Alarm+8001_06.wav, Start: 12.673469, End: 13.689342

Start time: 14.281179, End time: 15.265306
Processed: DATA_NIGENS\alarm\Alarm+8001_06.wav, Start: 14.281179, End: 15.265306

Start time: 15.857143, End time: 16.84127
Processed: DATA_NIGENS\alarm\Alarm+8001_06.wav, Start: 15.857143, End: 16.84127

Start time: 17.433107, End time: 18.31746
Processed: DATA_NIGENS\alarm\Alarm+8001_06.wav, Start: 17.433107, End: 18.31746

Start time: 19.006803, End time: 19.952381
Processed: DATA_NIGENS\alarm\Alarm+8001_06.wav, Start: 19.006803, End: 19.952381

Start time: 20.582766, End time: 21.498866
Processed: DATA_NIGENS\alarm\Alarm+8001_06.wav, Start: 20.582766, End: 21.498866

Start time: 22.190476, End time: 23.07483
Process



Start time: 0.031746, End time: 1.888889
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF1_03_3.wav, Start: 0.031746, End: 1.888889

Start time: 1.965986, End time: 3.868481
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF1_03_3.wav, Start: 1.965986, End: 3.868481

Start time: 3.897959, End time: 5.761905
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF1_03_3.wav, Start: 3.897959, End: 5.761905

Start time: 5.85034, End time: 7.705215
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF1_03_3.wav, Start: 5.85034, End: 7.705215

Start time: 7.791383, End time: 9.648526
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF1_03_3.wav, Start: 7.791383, End: 9.648526

Processing: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_15.wav
Found corresponding text file: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_15.wav.txt
Start time: 0.00907, End time: 0.29932
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_15.wav, Start: 0.00907, End: 0.29932

Start time: 0.510204, End time: 0.809524
Processed: DATA_NIGENS



Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_29.wav, Start: 1.274376, End: 1.367347

Start time: 1.428571, End time: 1.526077
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_29.wav, Start: 1.428571, End: 1.526077

Start time: 1.600907, End time: 1.689342
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_29.wav, Start: 1.600907, End: 1.689342

Start time: 1.759637, End time: 1.845805
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_29.wav, Start: 1.759637, End: 1.845805

Start time: 1.9161, End time: 2.006803
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_29.wav, Start: 1.9161, End: 2.006803

Start time: 2.088435, End time: 2.165533
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_29.wav, Start: 2.088435, End: 2.165533

Start time: 2.240363, End time: 2.324263
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_29.wav, Start: 2.240363, End: 2.324263

Start time: 2.385488, End time: 2.480726
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_29.wav, Start: 2.385488, End:



Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_29.wav, Start: 3.172336, End: 3.290249

Start time: 3.365079, End time: 3.446712
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_29.wav, Start: 3.365079, End: 3.446712

Start time: 3.53288, End time: 3.619048
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_29.wav, Start: 3.53288, End: 3.619048

Start time: 3.702948, End time: 3.76644
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_29.wav, Start: 3.702948, End: 3.76644

Start time: 3.84127, End time: 3.929705
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_29.wav, Start: 3.84127, End: 3.929705

Start time: 3.99093, End time: 4.092971
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_29.wav, Start: 3.99093, End: 4.092971

Start time: 4.170068, End time: 4.251701
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_29.wav, Start: 4.170068, End: 4.251701

Start time: 4.342404, End time: 4.408163
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_29.wav, Start: 4.342404, End: 4.4



Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_29.wav, Start: 5.122449, End: 5.206349

Start time: 5.265306, End time: 5.365079
Processed: DATA_NIGENS\alarm\ElectronicAlarm+L2SF2_29.wav, Start: 5.265306, End: 5.365079

Processing: DATA_NIGENS\alarm\ElectronicBeep+6063_33.wav
Found corresponding text file: DATA_NIGENS\alarm\ElectronicBeep+6063_33.wav.txt
Start time: 0.002268, End time: 21.331066
Processed: DATA_NIGENS\alarm\ElectronicBeep+6063_33.wav, Start: 0.002268, End: 21.331066

Processing: DATA_NIGENS\alarm\ElectronicBeep+6063_34.wav
Found corresponding text file: DATA_NIGENS\alarm\ElectronicBeep+6063_34.wav.txt
Start time: 0.031746, End time: 29.961451
Processed: DATA_NIGENS\alarm\ElectronicBeep+6063_34.wav, Start: 0.031746, End: 29.961451

Processing: DATA_NIGENS\alarm\FireAlarm+EMR01_18_5.wav
Found corresponding text file: DATA_NIGENS\alarm\FireAlarm+EMR01_18_5.wav.txt
Start time: 0.002268, End time: 30.600907
Processed: DATA_NIGENS\alarm\FireAlarm+EMR01_18_5.wav, Start: 0.



Processed: DATA_NIGENS\baby\HumanBaby+6032_01_2.wav, Start: 46.498866, End: 49.22449

Start time: 49.614512, End time: 50.174603
Processed: DATA_NIGENS\baby\HumanBaby+6032_01_2.wav, Start: 49.614512, End: 50.174603

Start time: 50.219955, End time: 53.836735
Processed: DATA_NIGENS\baby\HumanBaby+6032_01_2.wav, Start: 50.219955, End: 53.836735

Start time: 54.335601, End time: 55.503401
Processed: DATA_NIGENS\baby\HumanBaby+6032_01_2.wav, Start: 54.335601, End: 55.503401

Start time: 55.600907, End time: 55.736961
Processed: DATA_NIGENS\baby\HumanBaby+6032_01_2.wav, Start: 55.600907, End: 55.736961

Start time: 55.891156, End time: 56.791383
Processed: DATA_NIGENS\baby\HumanBaby+6032_01_2.wav, Start: 55.891156, End: 56.791383

Processing: DATA_NIGENS\baby\HumanBaby+6052_18.wav
Found corresponding text file: DATA_NIGENS\baby\HumanBaby+6052_18.wav.txt
Start time: 0.077098, End time: 0.37415
Processed: DATA_NIGENS\baby\HumanBaby+6052_18.wav, Start: 0.077098, End: 0.37415

Start time: 0.707



Processed: DATA_NIGENS\crash\Crash+6010_95_4.wav, Start: 1.655329, End: 5.76644

Processing: DATA_NIGENS\crash\Crash+6010_95_5.wav
Found corresponding text file: DATA_NIGENS\crash\Crash+6010_95_5.wav.txt
Start time: 2.138322, End time: 7.358277
Processed: DATA_NIGENS\crash\Crash+6010_95_5.wav, Start: 2.138322, End: 7.358277

Processing: DATA_NIGENS\crash\crash-closet+of+junk_CAP01-119.wav
Found corresponding text file: DATA_NIGENS\crash\crash-closet+of+junk_CAP01-119.wav.txt
Start time: 0.036281, End time: 3.399093
Processed: DATA_NIGENS\crash\crash-closet+of+junk_CAP01-119.wav, Start: 0.036281, End: 3.399093

Start time: 3.535147, End time: 3.712018
Processed: DATA_NIGENS\crash\crash-closet+of+junk_CAP01-119.wav, Start: 3.535147, End: 3.712018

Start time: 3.895692, End time: 4.770975
Processed: DATA_NIGENS\crash\crash-closet+of+junk_CAP01-119.wav, Start: 3.895692, End: 4.770975

Processing: DATA_NIGENS\crash\CRASH-CLOSET_GEN-HDF-08693.wav
Found corresponding text file: DATA_NIGENS\cr



Processed: DATA_NIGENS\femaleSpeech\bbac7a.wav, Start: 1.67552, End: 2.19296

Processing: DATA_NIGENS\femaleSpeech\bbac8n.wav
Found corresponding text file: DATA_NIGENS\femaleSpeech\bbac8n.wav.txt
Start time: 0.56672, End time: 0.7392
Processed: DATA_NIGENS\femaleSpeech\bbac8n.wav, Start: 0.56672, End: 0.7392

Start time: 0.79744, End time: 1.04832
Processed: DATA_NIGENS\femaleSpeech\bbac8n.wav, Start: 0.79744, End: 1.04832

Start time: 1.10432, End time: 1.49632
Processed: DATA_NIGENS\femaleSpeech\bbac8n.wav, Start: 1.10432, End: 1.49632

Start time: 1.59712, End time: 1.9264
Processed: DATA_NIGENS\femaleSpeech\bbac8n.wav, Start: 1.59712, End: 1.9264

Processing: DATA_NIGENS\femaleSpeech\bbac9n.wav
Found corresponding text file: DATA_NIGENS\femaleSpeech\bbac9n.wav.txt
Start time: 0.66528, End time: 0.9072
Processed: DATA_NIGENS\femaleSpeech\bbac9n.wav, Start: 0.66528, End: 0.9072

Start time: 0.95424, End time: 1.22752
Processed: DATA_NIGENS\femaleSpeech\bbac9n.wav, Start: 0.95424, En



Start time: 0.53312, End time: 1.61504
Processed: DATA_NIGENS\femaleSpeech\swwo8p.wav, Start: 0.53312, End: 1.61504

Start time: 1.70912, End time: 2.2064
Processed: DATA_NIGENS\femaleSpeech\swwo8p.wav, Start: 1.70912, End: 2.2064

Processing: DATA_NIGENS\femaleSpeech\swwo9a.wav
Found corresponding text file: DATA_NIGENS\femaleSpeech\swwo9a.wav.txt
Start time: 0.43008, End time: 2.2736
Processed: DATA_NIGENS\femaleSpeech\swwo9a.wav, Start: 0.43008, End: 2.2736

Processing: DATA_NIGENS\femaleSpeech\swwo9n.wav
Found corresponding text file: DATA_NIGENS\femaleSpeech\swwo9n.wav.txt
Start time: 0.49728, End time: 0.68096
Processed: DATA_NIGENS\femaleSpeech\swwo9n.wav, Start: 0.49728, End: 0.68096

Start time: 0.728, End time: 0.94304
Processed: DATA_NIGENS\femaleSpeech\swwo9n.wav, Start: 0.728, End: 0.94304

Start time: 1.03264, End time: 2.01824
Processed: DATA_NIGENS\femaleSpeech\swwo9n.wav, Start: 1.03264, End: 2.01824

Processing: DATA_NIGENS\femaleSpeech\swwp1p.wav
Found corresponding 



Processed: DATA_NIGENS\footsteps\FOOTSTEPS-INDOOR_GEN-HDF-11495.wav, Start: 1.258503, End: 1.281179

Start time: 1.492063, End time: 1.648526
Processed: DATA_NIGENS\footsteps\FOOTSTEPS-INDOOR_GEN-HDF-11495.wav, Start: 1.492063, End: 1.648526

Start time: 1.759637, End time: 1.870748
Processed: DATA_NIGENS\footsteps\FOOTSTEPS-INDOOR_GEN-HDF-11495.wav, Start: 1.759637, End: 1.870748

Start time: 2.024943, End time: 2.204082
Processed: DATA_NIGENS\footsteps\FOOTSTEPS-INDOOR_GEN-HDF-11495.wav, Start: 2.024943, End: 2.204082

Start time: 2.315193, End time: 2.403628
Processed: DATA_NIGENS\footsteps\FOOTSTEPS-INDOOR_GEN-HDF-11495.wav, Start: 2.315193, End: 2.403628

Start time: 2.557823, End time: 2.736961
Processed: DATA_NIGENS\footsteps\FOOTSTEPS-INDOOR_GEN-HDF-11495.wav, Start: 2.557823, End: 2.736961

Start time: 2.825397, End time: 3.249433
Processed: DATA_NIGENS\footsteps\FOOTSTEPS-INDOOR_GEN-HDF-11495.wav, Start: 2.825397, End: 3.249433

Start time: 3.312925, End time: 3.430839
Proces



Processed: DATA_NIGENS\footsteps\FOOTSTEPS-INDOOR_GEN-HDF-11495.wav, Start: 4.405896, End: 4.46712

Start time: 4.668934, End time: 4.802721
Processed: DATA_NIGENS\footsteps\FOOTSTEPS-INDOOR_GEN-HDF-11495.wav, Start: 4.668934, End: 4.802721

Start time: 4.965986, End time: 4.988662
Processed: DATA_NIGENS\footsteps\FOOTSTEPS-INDOOR_GEN-HDF-11495.wav, Start: 4.965986, End: 4.988662

Start time: 5.204082, End time: 5.358277
Processed: DATA_NIGENS\footsteps\FOOTSTEPS-INDOOR_GEN-HDF-11495.wav, Start: 5.204082, End: 5.358277

Start time: 5.469388, End time: 5.603175
Processed: DATA_NIGENS\footsteps\FOOTSTEPS-INDOOR_GEN-HDF-11495.wav, Start: 5.469388, End: 5.603175

Start time: 5.714286, End time: 5.802721
Processed: DATA_NIGENS\footsteps\FOOTSTEPS-INDOOR_GEN-HDF-11495.wav, Start: 5.714286, End: 5.802721

Start time: 5.981859, End time: 6.136054
Processed: DATA_NIGENS\footsteps\FOOTSTEPS-INDOOR_GEN-HDF-11495.wav, Start: 5.981859, End: 6.136054

Start time: 6.247166, End time: 6.403628
Process

# Split Dataset

In [12]:
train_data, test_data = train_test_split(labeled_data, test_size=0.2, train_size=0.8, random_state=42)


In [13]:
label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_data['class_label'])
test_labels = label_encoder.transform(test_data['class_label'])


In [14]:
X_train = np.array(train_data['features'].tolist())
X_test = np.array(test_data['features'].tolist())


# Modelling

In [15]:
model = keras.Sequential([
    layers.Input(shape=(X_train.shape[1],)),
    layers.Reshape((X_train.shape[1], 1)),
    layers.Conv1D(64, kernel_size=3, activation='relu'),
    layers.MaxPooling1D(pool_size=2),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(len(label_encoder.classes_), activation='softmax')
])


model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


# Training

In [16]:
# Train the model
model.fit(X_train, train_labels, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x23dd18765f0>

# Evaluate

In [17]:
test_loss, test_accuracy = model.evaluate(X_test, test_labels)
print(f'Test Accuracy: {test_accuracy}')


Test Accuracy: 0.880095899105072


In [19]:
predictions = model.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)

predicted_class_labels = label_encoder.inverse_transform(predicted_labels)




In [20]:
# Calculate accuracy on the test set
accuracy = accuracy_score(test_data['class_label'], predicted_class_labels)
print(f'Overall Test Accuracy: {accuracy}')

Overall Test Accuracy: 0.8800959232613909


# Save Model

In [21]:
# model.save('testModel.keras')
# joblib.dump(label_encoder, 'label_encoderTest.joblib')

# Predict Class

In [18]:
# Predict Class
def predict_calsses_for_audio(file_path, model_path, class_path):
    model = keras.models.load_model(model_path)
    label_encoder = LabelEncoder()
    label_encoder.classes_ = np.load(class_path, allow_pickle=True)

    #Ekstrak MFCC
    features = extract_features_with_timing(file_path, start_time=0, end_time=librosa.get_duration(path=file_path))
    features = features.reshape(1, -1)

    predictions = model.predict(features)

    total_duration = librosa.get_duration(path=file_path)
    class_percentages = {class_label: percentage * 100 for class_label, percentage in zip(label_encoder.classes_, predictions[0])}

    return class_percentages

In [22]:
audio_path = "DATA_NIGENS\engine\AMBULANCE_GEN-HDF-00814.wav"

model_path = "testModel.keras"
class_path = "label_encoderTest.joblib"
class_percentages = predict_calsses_for_audio(audio_path, model_path, class_path)

highest_class_label = None
highest_percentage = 0

for class_label, percentage in class_percentages.items():
    print(f"Class: {class_label}, Percentage: {percentage: .2f}%")

    if percentage > highest_percentage:
        highest_percentage = percentage
        highest_class_label = class_label
if highest_class_label:
    print(f"There is high  possibilities this is a {highest_class_label} sound with percentage {highest_percentage}%")

Class: alarm, Percentage:  1.07%
Class: baby, Percentage:  0.00%
Class: crash, Percentage:  0.87%
Class: dog, Percentage:  0.01%
Class: engine, Percentage:  70.75%
Class: femaleScream, Percentage:  0.00%
Class: femaleSpeech, Percentage:  0.13%
Class: fire, Percentage:  0.09%
Class: footsteps, Percentage:  0.10%
Class: general, Percentage:  20.48%
Class: knock, Percentage:  4.55%
Class: maleScream, Percentage:  0.00%
Class: maleSpeech, Percentage:  0.04%
Class: phone, Percentage:  0.00%
Class: piano, Percentage:  1.90%
There is high  possibilities this is a engine sound with percentage 70.74955701828003%
