# Traning - Features

In [None]:
%pip install -r ../requirements.txt

In [80]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
os.environ['ABSL_LOG_THRESHOLD'] = '0'

import time
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import resample
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import LabelEncoder
from scipy.io import wavfile
import keras
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import Sequence
from sklearn.utils.class_weight import compute_class_weight
import librosa
import soundfile as sf
from tqdm import tqdm
from functools import partial
import json
import ast
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical


print("TensorFlow:", tf.__version__)
print("Keras:", keras.__version__)
print("Is TensorFlow using GPU?", tf.test.is_gpu_available())
print("GPU disponível:", tf.config.list_physical_devices('GPU'))
print("XLA ativado:", tf.config.optimizer.get_jit())
# Mostra configuração geral
tf.config.experimental.list_physical_devices()

TensorFlow: 2.19.0
Keras: 3.9.2
Is TensorFlow using GPU? True
GPU disponível: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
XLA ativado: 


I0000 00:00:1745704370.791005   10894 gpu_device.cc:2019] Created device /device:GPU:0 with 4047 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2060, pci bus id: 0000:0a:00.0, compute capability: 7.5


[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [30]:
AUGMENTED_DATASET_PATH = '../datasets/augmented'
AUGMENTED_DATASET_VERSION = 'v2'

AUGMENTED_DATASET_VERSION_PATH = Path(os.path.join(AUGMENTED_DATASET_PATH, AUGMENTED_DATASET_VERSION))
AUGMENTED_DATASET_TRAIN_METADATA = AUGMENTED_DATASET_VERSION_PATH / 'train_metadata.csv'
AUGMENTED_DATASET_TRAIN_DATA = AUGMENTED_DATASET_VERSION_PATH / 'train_data'

AUGMENTED_DATASET_TEST_METADATA = AUGMENTED_DATASET_VERSION_PATH / 'test_metadata.csv'
AUGMENTED_DATASET_TEST_DATA = AUGMENTED_DATASET_VERSION_PATH / 'test_data'

In [None]:
TRAINING_DATASET_PATH = '../datasets/training'
TRAINING_DATASET_VERSION = 'v3'

TRAINING_DATASET_VERSION_PATH = Path(os.path.join(TRAINING_DATASET_PATH, TRAINING_DATASET_VERSION))
TRAINING_DATASET_TRAIN_METADATA = TRAINING_DATASET_VERSION_PATH / 'train_metadata.csv'
TRAINING_DATASET_TEST_METADATA = TRAINING_DATASET_VERSION_PATH / 'test_metadata.csv'

TRAINING_DATASET_VERSION_PATH.mkdir(parents=True, exist_ok=True)
print(f"📁 Versão {TRAINING_DATASET_VERSION_PATH}: {TRAINING_DATASET_VERSION_PATH}")

📁 Versão ../datasets/training/v3: ../datasets/training/v3
📁 Versão ../datasets/training/v3/train_data: ../datasets/training/v3/train_data
📁 Versão ../datasets/training/v3/test_data: ../datasets/training/v3/test_data


In [52]:
def extract_features(row, basepath):
    filename = row["filename"]
    filepath = basepath / filename
    audio,sample_rate=librosa.load(filepath,res_type='kaiser_fast')
    mfccs_features=librosa.feature.mfcc(y=audio,sr=sample_rate,n_mfcc=40)
    mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)
    return mfccs_scaled_features

In [None]:
def prepare_dataset(metadata_file: str, data_path, output_file: str):
    tqdm.pandas()

    print(f"🔍 Lendo metadata: {metadata_file}\n")
    df = pd.read_csv(metadata_file)

    print("🎧 Extraindo features...")
    extract_func = partial(extract_features, basepath=data_path)
    df['features'] = df.progress_apply(extract_func, axis=1)

    print("🔄 Formatando features para salvar...")
    df['features'] = df['features'].progress_apply(lambda x: json.dumps(x.tolist()))

    # Remove colunas desnecessárias
    df = df.drop(columns=['filename', 'augmentation'])

    print(f"💾 Salvando metadata final em: {output_file}")
    df.to_csv(output_file, index=False)

    print(f"✅ Processo concluído! Total de registros: {len(df)}")
    return df


In [71]:
prepare_dataset(
    metadata_file=AUGMENTED_DATASET_TEST_METADATA,
    data_path=AUGMENTED_DATASET_TEST_DATA,
    output_file=TRAINING_DATASET_TEST_METADATA
)

🔍 Lendo metadata: ../datasets/augmented/v2/test_metadata.csv

🎧 Extraindo features...


100%|██████████| 480/480 [00:21<00:00, 22.69it/s]


🔄 Formatando features para salvar...


100%|██████████| 480/480 [00:00<00:00, 25276.09it/s]

💾 Salvando metadata final em: ../datasets/training/v3/test_metadata.csv
✅ Processo concluído! Total de registros: 480





In [None]:
prepare_dataset(
    metadata_file=AUGMENTED_DATASET_TRAIN_METADATA,
    data_path=AUGMENTED_DATASET_TRAIN_DATA,
    output_file=TRAINING_DATASET_TRAIN_METADATA
)

🔍 Lendo metadata: ../datasets/augmented/v2/train_metadata.csv

🎧 Extraindo features...


  0%|          | 0/15774 [00:00<?, ?it/s]

100%|██████████| 15774/15774 [08:29<00:00, 30.97it/s] 


🔄 Formatando features para salvar...


100%|██████████| 15774/15774 [00:00<00:00, 36796.81it/s]


💾 Salvando metadata final em: ../datasets/training/v3/train_metadata.csv
✅ Processo concluído! Total de registros: 15774


In [None]:
tqdm.pandas()
train_df = pd.read_csv(TRAINING_DATASET_TRAIN_METADATA)

train_df['features'] = train_df['features'].progress_apply(lambda x: np.array(ast.literal_eval(x)))


label_encoder = LabelEncoder()
train_df['label'] = label_encoder.fit_transform(train_df['class'])

train_df.head()

X = np.stack(train_df['features'].values)
y = train_df['label'].values

X_train, X_val, y_train, y_val = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


100%|██████████| 15774/15774 [00:02<00:00, 7579.02it/s]


Unnamed: 0,class,features,label
0,drum,"[-116.5557861328125, 57.869659423828125, -6.78...",0
1,drum,"[-106.1605224609375, 53.76694107055664, -6.617...",0
2,drum,"[-111.37716674804688, 56.37688064575195, -8.95...",0
3,drum,"[-108.94854736328125, 48.71305465698242, -2.92...",0
4,drum,"[-98.67511749267578, 57.76403045654297, -6.830...",0


In [74]:
model = models.Sequential([
    layers.Dense(128, activation='relu', input_shape=(40,)),
    layers.BatchNormalization(),
    layers.Dropout(0.3),

    layers.Dense(256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.4),

    layers.Dense(128, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),

    layers.Dense(num_labels, activation='softmax')
])

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1745704006.978230   10894 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4047 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2060, pci bus id: 0000:0a:00.0, compute capability: 7.5


NameError: name 'num_labels' is not defined