In [15]:
import os
import numpy as np
from scipy.io import wavfile
from scipy.signal import spectrogram
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Pasta contendo os arquivos de áudio
audio_folder = "sounds"

# Listas para armazenar as características e os rótulos
features = []
labels = []

# Loop sobre as subpastas (traffic, ambulance, firetruck)
for label in ["traffic", "ambulance", "firetruck"]:
    label_folder = os.path.join(audio_folder, label)
    for file_name in os.listdir(label_folder):
        file_path = os.path.join(label_folder, file_name)
        if file_path.endswith(".wav"):
            # Carrega o arquivo de áudio
            sample_rate, audio_data = wavfile.read(file_path)
            
            # Extrai as características
            # Método 1: Transformada de Fourier
            fft_result = np.abs(np.fft.fft(audio_data))
            fft_mean = np.mean(fft_result)
            
            # Método 2: Energia média do sinal
            energy_mean = np.mean(audio_data**2)
            
            # Método 3: Espectrograma
            _, _, spec = spectrogram(audio_data, fs=sample_rate)
            spec_mean = np.mean(spec)
            
            # Adiciona as características e o rótulo à lista
            features.append([fft_mean, energy_mean, spec_mean])
            labels.append(label)

# Converte as listas para arrays numpy
features = np.array(features)
labels = np.array(labels)

# Divide os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.8, random_state=42)

# Cria e treina o classificador Random Forest
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Faz previsões no conjunto de teste
y_pred = clf.predict(X_test)

# Calcula a acurácia do classificador

accuracy = accuracy_score(y_test, y_pred)
print("Acurácia:", accuracy)




Acurácia: 0.79375


In [16]:
print(len(y_test))
for i in range(len(y_test)):
    if y_test[i] != y_pred[i] and (y_test[i] == "traffic" or y_pred[i] == "traffic"):
        print(y_test[i], y_pred[i])

480
ambulance traffic
ambulance traffic
traffic ambulance
ambulance traffic
traffic ambulance
ambulance traffic
traffic ambulance
firetruck traffic
traffic firetruck
firetruck traffic
ambulance traffic
ambulance traffic
ambulance traffic
firetruck traffic
ambulance traffic
traffic ambulance
ambulance traffic
ambulance traffic
traffic ambulance
ambulance traffic
ambulance traffic
ambulance traffic
ambulance traffic
traffic ambulance
ambulance traffic
firetruck traffic
ambulance traffic
ambulance traffic
ambulance traffic
traffic ambulance
traffic firetruck
ambulance traffic
ambulance traffic
ambulance traffic
