In [116]:
import os
import librosa
import numpy as np
from scipy.io import wavfile
from scipy.signal import spectrogram
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.metrics import make_scorer, accuracy_score, recall_score, precision_score
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB


In [117]:
def read_audio_files(audio_folder):
    audios = []

    for label in ["traffic", "ambulance", "firetruck"]:
        label_folder = os.path.join(audio_folder, label)
        for file_name in os.listdir(label_folder):
            file_path = os.path.join(label_folder, file_name)

            if file_path.endswith(".wav"):
                audio_data, sample_rate = librosa.load(file_path, sr=None)
                audios.append([audio_data, sample_rate, label])
                
    return audios


In [118]:
def extract_features(audios):
    features = []
    labels = []

    folder_label = {"traffic": 0, "ambulance": 1, "firetruck": 1}

    for [audio_data, sample_rate, label] in audios:
                    
        energy_mean = np.mean(audio_data**2)

        zero_crossings = librosa.zero_crossings(audio_data, pad=False)
        zero_crossing_rate = np.mean(zero_crossings) 
        
        _, _, spec = spectrogram(audio_data, fs=sample_rate)
        spec_mean = np.mean(spec)
        
        features.append([energy_mean, spec_mean, zero_crossing_rate])
        labels.append(folder_label[label])

    return features, labels

In [119]:
def evaluate(model, features, labels):
    cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

    scoring = {
        'accuracy': make_scorer(accuracy_score),
        'recall': make_scorer(recall_score, average='binary'),
        'precision': make_scorer(precision_score, average='binary')
    }

    results = {'accuracy': [], 'recall': [], 'precision': []}

    for i in range(30):
        for train_index, test_index in cv.split(features, labels):
            X_train, X_test = [features[i] for i in train_index], [features[i] for i in test_index]
            y_train, y_test = [labels[i] for i in train_index], [labels[i] for i in test_index]

            smote = SMOTE(sampling_strategy=1.0,random_state=42)
            X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

            model.fit(X_train_smote, y_train_smote)

            y_pred = model.predict(X_test)

            accuracy = accuracy_score(y_test, y_pred)
            recall = recall_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred)

            results['accuracy'].append(accuracy)
            results['recall'].append(recall)
            results['precision'].append(precision)
    
    return results
    

In [120]:
def get_metrics(results):
    mean_accuracy = np.mean(results['accuracy'])
    std_accuracy = np.std(results['accuracy'])

    mean_recall = np.mean(results['recall'])
    std_recall = np.std(results['recall'])

    mean_precision = np.mean(results['precision'])
    std_precision = np.std(results['precision'])

    print(f'Acurácia média: {mean_accuracy:.2f} +/- {std_accuracy:.2f}')
    print(f'Recall médio: {mean_recall:.2f} +/- {std_recall:.2f}')
    print(f'Precisão média: {mean_precision:.2f} +/- {std_precision:.2f}')

In [122]:
audios = read_audio_files("sounds")
features, labels = extract_features(audios)

In [123]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
results = evaluate(model, features, labels)
get_metrics(results)

Acurácia média: 0.99 +/- 0.01
Recall médio: 0.98 +/- 0.01
Precisão média: 1.00 +/- 0.00


In [124]:
model = GaussianNB()
results = evaluate(model, features, labels)
get_metrics(results)

Acurácia média: 0.97 +/- 0.02
Recall médio: 0.98 +/- 0.02
Precisão média: 0.98 +/- 0.03
