In [None]:
import os
from pathlib import Path
import pandas as pd
import librosa
import numpy as np
import matplotlib.pyplot as plt

# Define paths relative to the script's directory without showing sensitive details
audio_path = Path("dataset") / "audio"
csv_path = Path("dataset") / "esc50.csv"

# Load the metadata CSV file
metadata = pd.read_csv(csv_path)

# Sample processing code to verify paths
print("Audio path:", audio_path)
print("CSV path:", csv_path)


Audio path: dataset\audio
CSV path: dataset\esc50.csv


In [None]:
# Parameters
sr = 22050
duration = 5
n_mfcc = 13
n_mels = 128

# Output
mfcc_dir = Path("processed_data") / "mfcc"
mel_dir = Path("processed_data") / "mel_spectrogram"
mfcc_dir.mkdir(parents=True, exist_ok=True)
mel_dir.mkdir(parents=True, exist_ok=True)

def extract_features(file_path):
    # Load the audio file, ensure duration of 5 seconds
    audio, _ = librosa.load(file_path, sr=sr, duration=duration)

    # MFCC features
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)

    # Mel spectrogram
    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=n_mels, fmax=sr/2)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

    return mfcc, mel_spec_db

for index, row in metadata.iterrows():
    file_name = row["filename"]
    file_path = audio_path / file_name
    
    # Extract features
    mfcc, mel_spec_db = extract_features(file_path)
    
    # Save each feature set separately as .npy files
    mfcc_save_path = mfcc_dir / f"{file_name.split('.')[0]}_mfcc.npy"
    mel_save_path = mel_dir / f"{file_name.split('.')[0]}_mel.npy"
    np.save(mfcc_save_path, mfcc)
    np.save(mel_save_path, mel_spec_db)

print("Feature extraction complete! MFCC and Mel spectrograms saved.")

Feature extraction complete! MFCC and Mel spectrograms saved.


In [21]:
import os
import numpy as np
import librosa
from pathlib import Path

# Ensure directories exist
Path("processed_data/mfcc").mkdir(parents=True, exist_ok=True)
Path("processed_data/mel_spectrogram").mkdir(parents=True, exist_ok=True)

# Function to save MFCC and Mel Spectrogram
def save_features(audio_path, filename, sr=22050):
    # Load audio
    y, _ = librosa.load(audio_path, sr=sr)

    # Compute MFCC and Mel spectrogram
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)

    # Save as .npy
    np.save(f"processed_data/mfcc/{filename}_mfcc.npy", mfcc)
    np.save(f"processed_data/mel_spectrogram/{filename}_mel_spectrogram.npy", mel_spectrogram)

# Example usage
# Replace 'dataset/audio' with your actual audio file directory
for _, row in metadata.iterrows():
    audio_file = f"dataset/audio/{row['filename']}"
    filename = row["filename"].split(".")[0]
    save_features(audio_file, filename)

In [22]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from pathlib import Path

# Load metadata
metadata = pd.read_csv("dataset/esc50.csv")

# Function to load features and flatten them
def load_features(feature_type="mfcc"):
    feature_dir = Path(f"processed_data/{feature_type}")
    X, y = [], []
    for _, row in metadata.iterrows():
        class_label = row["category"]
        file_name = row["filename"].split(".")[0]
        
        # Load .npy file
        feature_path = feature_dir / f"{file_name}_{feature_type}.npy"
        features = np.load(feature_path)
        
        # Flatten the features to 1D for simple models
        X.append(features.flatten())
        y.append(class_label)
        
    return np.array(X), np.array(y)

# Load and split data for MFCC
X_mfcc, y_mfcc = load_features("mfcc")
X_train_mfcc, X_test_mfcc, y_train_mfcc, y_test_mfcc = train_test_split(X_mfcc, y_mfcc, test_size=0.2, random_state=42)

# Load and split data for Mel Spectrogram
X_mel, y_mel = load_features("mel_spectrogram")
X_train_mel, X_test_mel, y_train_mel, y_test_mel = train_test_split(X_mel, y_mel, test_size=0.2, random_state=42)



In [23]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Initialize models
knn = KNeighborsClassifier(n_neighbors=5)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
svm = SVC(kernel="linear")

# Dictionary to store models
models = {"KNN": knn, "Random Forest": rf, "SVM": svm}

# Function to train and evaluate models
def train_and_evaluate(X_train, X_test, y_train, y_test, feature_type):
    print(f"\nEvaluating models for {feature_type} features:\n")
    for name, model in models.items():
        # Train the model
        model.fit(X_train, y_train)
        
        # Predict on the test set
        y_pred = model.predict(X_test)
        
        # Evaluate the model
        accuracy = accuracy_score(y_test, y_pred)
        print(f"{name} Model Accuracy: {accuracy:.2f}")
        print(f"{name} Classification Report:\n{classification_report(y_test, y_pred)}\n")

# Train and evaluate for MFCC features
train_and_evaluate(X_train_mfcc, X_test_mfcc, y_train_mfcc, y_test_mfcc, "MFCC")

# Train and evaluate for Mel Spectrogram features
train_and_evaluate(X_train_mel, X_test_mel, y_train_mel, y_test_mel, "Mel Spectrogram")



Evaluating models for MFCC features:

KNN Model Accuracy: 0.25
KNN Classification Report:
                  precision    recall  f1-score   support

        airplane       0.25      0.38      0.30         8
       breathing       0.23      0.38      0.29         8
  brushing_teeth       0.08      0.43      0.13         7
     can_opening       0.33      0.12      0.18         8
        car_horn       0.04      0.33      0.07         3
             cat       0.13      0.33      0.19        12
        chainsaw       0.08      0.25      0.12         4
  chirping_birds       0.17      0.33      0.22         6
    church_bells       0.40      0.25      0.31         8
        clapping       0.60      0.55      0.57        11
     clock_alarm       0.00      0.00      0.00         8
      clock_tick       0.50      0.20      0.29         5
        coughing       0.00      0.00      0.00        10
             cow       1.00      0.54      0.70        13
  crackling_fire       0.20      0.22 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Random Forest Model Accuracy: 0.37
Random Forest Classification Report:
                  precision    recall  f1-score   support

        airplane       0.17      0.25      0.20         8
       breathing       0.14      0.12      0.13         8
  brushing_teeth       0.23      0.43      0.30         7
     can_opening       0.30      0.38      0.33         8
        car_horn       0.29      0.67      0.40         3
             cat       0.57      0.33      0.42        12
        chainsaw       0.11      0.25      0.15         4
  chirping_birds       0.20      0.17      0.18         6
    church_bells       0.20      0.12      0.15         8
        clapping       0.78      0.64      0.70        11
     clock_alarm       0.33      0.12      0.18         8
      clock_tick       0.25      0.60      0.35         5
        coughing       0.40      0.20      0.27        10
             cow       0.80      0.31      0.44        13
  crackling_fire       0.30      0.78      0.44         9

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


SVM Model Accuracy: 0.35
SVM Classification Report:
                  precision    recall  f1-score   support

        airplane       0.29      0.50      0.36         8
       breathing       0.00      0.00      0.00         8
  brushing_teeth       0.38      0.43      0.40         7
     can_opening       0.62      0.62      0.62         8
        car_horn       0.08      0.33      0.12         3
             cat       0.40      0.17      0.24        12
        chainsaw       0.17      0.25      0.20         4
  chirping_birds       0.00      0.00      0.00         6
    church_bells       0.29      0.25      0.27         8
        clapping       0.83      0.45      0.59        11
     clock_alarm       0.33      0.12      0.18         8
      clock_tick       0.08      0.20      0.12         5
        coughing       0.44      0.40      0.42        10
             cow       0.75      0.46      0.57        13
  crackling_fire       0.18      0.22      0.20         9
        crickets   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


KNN Model Accuracy: 0.09
KNN Classification Report:
                  precision    recall  f1-score   support

        airplane       0.03      0.12      0.05         8
       breathing       0.10      0.38      0.15         8
  brushing_teeth       0.07      0.29      0.12         7
     can_opening       0.00      0.00      0.00         8
        car_horn       0.00      0.00      0.00         3
             cat       0.00      0.00      0.00        12
        chainsaw       0.00      0.00      0.00         4
  chirping_birds       0.00      0.00      0.00         6
    church_bells       0.00      0.00      0.00         8
        clapping       0.15      0.36      0.21        11
     clock_alarm       1.00      0.12      0.22         8
      clock_tick       0.00      0.00      0.00         5
        coughing       0.29      0.20      0.24        10
             cow       0.00      0.00      0.00        13
  crackling_fire       0.05      0.11      0.07         9
        crickets   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Random Forest Model Accuracy: 0.37
Random Forest Classification Report:
                  precision    recall  f1-score   support

        airplane       0.21      0.38      0.27         8
       breathing       0.00      0.00      0.00         8
  brushing_teeth       0.22      0.57      0.32         7
     can_opening       0.50      0.50      0.50         8
        car_horn       0.25      0.67      0.36         3
             cat       0.50      0.17      0.25        12
        chainsaw       0.17      0.25      0.20         4
  chirping_birds       0.20      0.17      0.18         6
    church_bells       0.25      0.12      0.17         8
        clapping       0.86      0.55      0.67        11
     clock_alarm       0.67      0.25      0.36         8
      clock_tick       0.07      0.20      0.10         5
        coughing       0.38      0.30      0.33        10
             cow       0.80      0.31      0.44        13
  crackling_fire       0.36      0.56      0.43         9

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


SVM Model Accuracy: 0.13
SVM Classification Report:
                  precision    recall  f1-score   support

        airplane       0.07      0.25      0.11         8
       breathing       0.11      0.12      0.12         8
  brushing_teeth       0.09      0.14      0.11         7
     can_opening       0.50      0.25      0.33         8
        car_horn       0.00      0.00      0.00         3
             cat       0.50      0.08      0.14        12
        chainsaw       0.00      0.00      0.00         4
  chirping_birds       0.00      0.00      0.00         6
    church_bells       0.00      0.00      0.00         8
        clapping       0.25      0.09      0.13        11
     clock_alarm       0.25      0.12      0.17         8
      clock_tick       0.33      0.20      0.25         5
        coughing       0.00      0.00      0.00        10
             cow       0.43      0.23      0.30        13
  crackling_fire       0.00      0.00      0.00         9
        crickets   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
