# Training

In [None]:
%pip install -r ../requirements.txt

## Neural Network

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
os.environ['ABSL_LOG_THRESHOLD'] = '0'

import time
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import resample
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import keras
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import Sequence
from sklearn.utils.class_weight import compute_class_weight


print("TensorFlow:", tf.__version__)
print("Keras:", keras.__version__)
print("Is TensorFlow using GPU?", tf.test.is_gpu_available())
print("GPU disponível:", tf.config.list_physical_devices('GPU'))
print("XLA ativado:", tf.config.optimizer.get_jit())
# Mostra configuração geral
tf.config.experimental.list_physical_devices()

In [None]:
TRAINING_DATASET_PATH = '../datasets/training'
TRAINING_DATASET_VERSION = 'v2'

TRAINING_DATASET_VERSION_PATH = Path(os.path.join(TRAINING_DATASET_PATH, TRAINING_DATASET_VERSION))

TRAINING_DATASET_TRAIN_METADATA = TRAINING_DATASET_VERSION_PATH / 'train_metadata.csv'
TRAINING_DATASET_TRAIN_DATA = TRAINING_DATASET_VERSION_PATH / 'train_data'

TRAINING_DATASET_TEST_METADATA = TRAINING_DATASET_VERSION_PATH / 'test_metadata.csv'
TRAINING_DATASET_TEST_DATA = TRAINING_DATASET_VERSION_PATH / 'test_data'


In [None]:
# Leitura dos metadados
train_meta = pd.read_csv(TRAINING_DATASET_TRAIN_METADATA)#.drop(columns=["augmentation"])
test_meta = pd.read_csv(TRAINING_DATASET_TEST_METADATA)#.drop(columns=["augmentation"])

# Validação de classes únicas
train_classes = set(train_meta['class'].unique())
test_classes = set(test_meta['class'].unique())

if train_classes != test_classes:
    raise ValueError(f"As classes do teste não batem com as do treino.\nTreino: {train_classes}\nTeste: {test_classes}")

# Mapeamento único
unique_classes = sorted(train_classes)
class_to_idx = {cls: idx for idx, cls in enumerate(unique_classes)}

# Análise das distribuições
print("📊 Distribuição no treino:")
print(train_meta['class'].value_counts().sort_index())

print("\n📊 Distribuição no teste:")
print(test_meta['class'].value_counts().sort_index())

print("\n🔢 Mapeamento de classes:")
for cls, idx in class_to_idx.items():
    print(f"{cls}: {idx}")


In [None]:
# Cálculo automático dos pesos
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.array(list(class_to_idx.values())),
    y=train_meta['class'].map(class_to_idx).values
)

# Transforma em dicionário
class_weight_dict = {i: w for i, w in enumerate(class_weights)}
print("class weights:", class_weight_dict)

In [None]:
def audio_npz_generator(meta_df, data_dir, class_map, augment=False):
    for _, row in meta_df.iterrows():
        path = data_dir / row['filename']
        try:
            mel = np.load(path)['mel'].astype(np.float32)
            mel = mel[:, :128]            
            label = class_map[row['class']]
            yield mel[..., np.newaxis], label
        except Exception as e:
            print(f"Erro ao carregar {path}: {e}")
            continue


input_shape = (128, 128, 1)

In [None]:
# Subsets (definidos como antes)
TRAIN_SAMPLE_SIZE = 1400
VAL_SAMPLE_SIZE = 600

# TRAIN_SAMPLE_SIZE = 140000
# VAL_SAMPLE_SIZE = 6000

BATCH_SIZE = 128
RANDOM_STATE = 42

train_meta_sampled = train_meta#.sample(n=TRAIN_SAMPLE_SIZE, random_state=RANDOM_STATE)
val_meta_sampled = test_meta#.sample(n=VAL_SAMPLE_SIZE, random_state=RANDOM_STATE)

# Class weights baseados no subset
subset_class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.array(list(class_to_idx.values())),
    y=train_meta_sampled['class'].map(class_to_idx).values
)
subset_class_weight_dict = {i: w for i, w in enumerate(subset_class_weights)}

# Dataset de treino
train_ds = tf.data.Dataset.from_generator(
    lambda: audio_npz_generator(train_meta_sampled, TRAINING_DATASET_TRAIN_DATA, class_to_idx, augment=True),
    output_signature=(
        tf.TensorSpec(shape=input_shape, dtype=tf.float32),
        tf.TensorSpec(shape=(), dtype=tf.int64)
    )
).shuffle(2048).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# Dataset de validação
val_ds = tf.data.Dataset.from_generator(
    lambda: audio_npz_generator(val_meta_sampled, TRAINING_DATASET_TEST_DATA, class_to_idx),
    output_signature=(
        tf.TensorSpec(shape=input_shape, dtype=tf.float32),
        tf.TensorSpec(shape=(), dtype=tf.int64)
    )
).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [None]:
# 🔢 Quantidade alvo por classe
SAMPLES_PER_CLASS = 1000  # ajuste conforme necessário

# 📊 Balanceamento: faz oversampling se precisar
balanced_train_meta = pd.concat([
    resample(
        group,
        replace=True,
        n_samples=SAMPLES_PER_CLASS,
        random_state=RANDOM_STATE
    )
    for _, group in train_meta_sampled.groupby('class')
]).reset_index(drop=True)

# 🧾 Verificação da distribuição
print("📊 Distribuição balanceada:")
print(balanced_train_meta['class'].value_counts().sort_index())

balanced_train_meta.head()

balanced_train_ds = tf.data.Dataset.from_generator(
    lambda: audio_npz_generator(balanced_train_meta, TRAINING_DATASET_TRAIN_DATA, class_to_idx, augment=True),
    output_signature=(
        tf.TensorSpec(shape=input_shape, dtype=tf.float32),
        tf.TensorSpec(shape=(), dtype=tf.int64)
    )
).shuffle(2048).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [None]:
num_classes = len(class_to_idx)


model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
    layers.BatchNormalization(),
    layers.MaxPooling2D(),

    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D(),

    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D(),

    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.4),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=[
        'accuracy',
        # tfa.metrics.F1Score(num_classes=4, average='macro', name='f1_macro'),
        # tfa.metrics.Precision(name='precision'),
        # tfa.metrics.Recall(name='recall')
    ]
)

model.summary()


In [None]:
import math

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['ABSL_LOG_THRESHOLD'] = '3'

EPOCHS = 2
dataset = train_ds
metadata = train_meta 

steps_per_epoch = math.ceil(len(metadata) / BATCH_SIZE)

print(f"steps_per_epoch: {steps_per_epoch}")

class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.array(list(class_to_idx.values())),
    y=metadata['class'].map(class_to_idx).values
)
class_weight_dict = {i: w for i, w in enumerate(class_weights)}


history = model.fit(
    dataset,
    validation_data=val_ds,
    epochs=EPOCHS,
    steps_per_epoch=steps_per_epoch,
    class_weight=subset_class_weight_dict
)



## Validação

In [None]:

history_dict = history.history

# Loss
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history_dict['loss'], label='Treino')
plt.plot(history_dict['val_loss'], label='Validação')
plt.title('Loss')
plt.xlabel('Época')
plt.ylabel('Loss')
plt.legend()

# Accuracy
plt.subplot(1, 2, 2)
plt.plot(history_dict['accuracy'], label='Treino')
plt.plot(history_dict['val_accuracy'], label='Validação')
plt.title('Accuracy')
plt.xlabel('Época')
plt.ylabel('Acurácia')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
y_true = []
y_pred = []

for batch_x, batch_y in val_ds:
    preds = model.predict(batch_x, verbose=0)
    y_true.extend(batch_y.numpy())
    y_pred.extend(np.argmax(preds, axis=1))

# Relatório de classificação
print(classification_report(y_true, y_pred, target_names=class_to_idx.keys()))


# Matriz de confusão
cm = confusion_matrix(y_true, y_pred)
print('confusion')
print(cm)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=list(class_to_idx.keys()))
disp.plot(xticks_rotation=45, cmap='Blues')
plt.tight_layout()
plt.show()