<a href="https://colab.research.google.com/github/murilomatutino/ArmAlfa-preprocess/blob/main/ArmAlfa_preprocess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# importando as bibliotecas

import os
import librosa
import math
import json
import soundfile as sf
import pandas as pd
import numpy as np
import tensorflow as tf

In [None]:
# definindo variaveis

JSON_PATH = 'drive/MyDrive/IA_ArmAlfa_dataset/data.json'
os.makedirs(os.path.dirname(JSON_PATH), exist_ok=True)  # Cria o diretório se ele não existir

SAMPLE_RATE = 22050

N_FFT = 2048
HOP_LENGTH = 512
N_MFCC = 13

In [None]:
metadata = pd.read_csv('drive/MyDrive/UrbanSound8K/metadata/UrbanSound8K.csv')

name_sound = metadata['slice_file_name']
fold_sound = metadata['fold']
class_sound = metadata['classID']

In [None]:
data ={
    "mapping": ["air_conditioner", "car_horn", "children_playing", "dog_bark", "drilling", "engine_idling", "gun_shot", "jackhammer", "siren", "street_music"],
    "labels": [],
    "mfcc": []
}

for i in range(len(name_sound)):
  file_path = f'drive/MyDrive/UrbanSound8K/audio/fold{fold_sound[i]}/{name_sound[i]}'

  signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)

  # extraindo mfccs
  mfcc = librosa.feature.mfcc(y=signal,
                              sr=sr,
                              n_fft=N_FFT,
                              n_mfcc=N_MFCC,
                              hop_length=HOP_LENGTH)
  mfcc =  mfcc.T

  # Converte o espectrograma para um tensor TensorFlow, se ainda não for um.
  spectrogram = tf.convert_to_tensor(mfcc)

  # Redimensiona o espectrograma para 2024x2024 pixels usando interpolação bilinear.
  resized_spectrogram = tf.image.resize(
      spectrogram, [2024, 2024], method=tf.image.ResizeMethod.BILINEAR
  )

  # Converte o espectrograma redimensionado de volta para um array NumPy.
  resized_spectrogram = resized_spectrogram.numpy()

  # armazenando os dados
  data["mfcc"].append(mfcc.tolist())
  data["labels"].append(int(class_sound[i]))
  print("{}, número:{}".format(file_path, i+1))


# gera um arquivo json para armazenar os dados
with open(JSON_PATH, "w") as fp:
    json.dump(data, fp, indent=4)