In [None]:
import os
import librosa
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from sklearn.preprocessing import LabelEncoder
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

DATA_PATH_TRAIN = "C:/Users/KB/OneDrive/Desktop/noisedetection/dataset/train"
DATA_PATH_TEST = "C:/Users/KB/OneDrive/Desktop/noisedetection/dataset/test"
SAMPLE_RATE = 22050
MFCC_COUNT = 13

# def extract_mfccs(file_path):
#     audio, _ = librosa.load(file_path, sr=SAMPLE_RATE, mono=True)
#     mfccs = librosa.feature.mfcc(y=audio, sr=SAMPLE_RATE, n_mfcc=MFCC_COUNT)
#     return np.mean(mfccs.T, axis=0)







In [None]:

def extract_mfccs(file_path):
    # Skip non-audio files
    if not file_path.endswith(('.wav', '.flac', '.mp3')):
        print(f"Skipping {file_path}")
        return None

    print(f"Loading {file_path}")
    audio, _ = librosa.load(file_path, sr=SAMPLE_RATE, mono=True)
    mfccs = librosa.feature.mfcc(y=audio, sr=SAMPLE_RATE, n_mfcc=MFCC_COUNT)
    return np.mean(mfccs.T, axis=0)


In [None]:
def load_data(data_path):
    mfccs = []
    labels = []
    for label in ["clean", "noise"]:
        folder_path = os.path.join(data_path, label)
        for filename in os.listdir(folder_path):
            file_path = os.path.join(folder_path, filename)
            mfcc = extract_mfccs(file_path)
            mfccs.append(mfcc)
            labels.append(label)
    return np.array(mfccs), np.array(labels)


In [None]:
X, y = load_data(DATA_PATH_TRAIN)

le = LabelEncoder()
y = le.fit_transform(y)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential([
    Dense(100, activation='selu', kernel_initializer="lecun_normal", input_shape=(MFCC_COUNT,)),
    Dense(50, activation='selu', kernel_initializer="lecun_normal"),
    Dense(3, activation='softmax'),
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)

model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32, callbacks=[es])


In [None]:

y_val_pred = np.argmax(model.predict(X_val), axis=-1)

cm = confusion_matrix(y_val, y_val_pred)
print("Confusion Matrix:")
print(cm)

cr = classification_report(y_val, y_val_pred)
print("Classification Report:")
print(cr)

loss, accuracy = model.evaluate(X_val, y_val, verbose=0)
print("Model Accuracy: {:.2f}%".format(accuracy*100))
print("Model Loss: {:.2f}".format(loss))


In [None]:

def predict_audio_class(audio_file_path, model, le):
    mfcc = extract_mfccs(audio_file_path)
    mfcc = np.expand_dims(mfcc, axis=0)  # because the model expects 2D array
    prediction = np.argmax(model.predict(mfcc), axis=-1)
    prediction_label = le.inverse_transform(prediction)[0]
    return prediction_label

audio_file_path = "C:/Users/KB/OneDrive/Desktop/noisedetection/dataset/test/testaudio (1987).wav"  # Adjust if necessary
prediction = predict_audio_class(audio_file_path, model, le)
print(f"The audio is predicted as: {prediction}")