In [16]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
from sklearn.decomposition import PCA
from sklearn.metrics import precision_score, recall_score, f1_score, hamming_loss
import itertools

# --- Simular dataset ---
# Embeddings concatenados (video=1024, audio=128) = 1152 features
# Multilabel com 4800 classes (multi-hot)

num_samples = 1000
num_features = 1152
num_classes = 4800

# Simula entradas
X = np.random.rand(num_samples, num_features).astype(np.float32)

# Simula labels multilabel multi-hot (0/1)
Y = np.random.randint(0, 2, size=(num_samples, num_classes)).astype(np.float32)

# --- Dividir treino/validação ---
split = int(num_samples * 0.8)
X_train, X_val = X[:split], X[split:]
Y_train, Y_val = Y[:split], Y[split:]

# --- Funções métricas multilabel ---
def precision_at_k(y_true, y_pred, k=20):
    top_k_preds = tf.math.top_k(y_pred, k=k).indices
    precisions = []
    for i in range(y_true.shape[0]):
        true_labels = tf.where(y_true[i] > 0)[:,0]
        # Converter para int32 para evitar erro de tipo
        true_labels = tf.cast(true_labels, tf.int32)
        pred_labels = tf.cast(top_k_preds[i], tf.int32)
        intersect = tf.sets.intersection(tf.expand_dims(true_labels,0), tf.expand_dims(pred_labels,0))
        precisions.append(tf.size(intersect.values)/k)
    return tf.reduce_mean(precisions)

def multilabel_metrics(y_true, y_pred, threshold=0.5):
    y_pred_bin = (y_pred >= threshold).numpy().astype(int)
    y_true_np = y_true.numpy().astype(int)

    precision = precision_score(y_true_np, y_pred_bin, average='samples', zero_division=0)
    recall = recall_score(y_true_np, y_pred_bin, average='samples', zero_division=0)
    f1 = f1_score(y_true_np, y_pred_bin, average='samples', zero_division=0)
    hamming = hamming_loss(y_true_np, y_pred_bin)
    return precision, recall, f1, hamming

# --- 1. Modelo Base ---
def build_base_model(input_dim, output_dim):
    model = models.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(512, activation='relu'),
        layers.Dense(output_dim, activation='sigmoid')
    ])
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=[]
    )
    return model

# --- 2. Modelo Base + variações ---
def apply_pca(X_train, X_val, n_components=256):
    pca = PCA(n_components=n_components)
    X_train_pca = pca.fit_transform(X_train)
    X_val_pca = pca.transform(X_val)
    return X_train_pca, X_val_pca

# --- 3. Modelo Base + Blocos ---
def build_model_with_blocks(input_dim, output_dim):
    inputs = layers.Input(shape=(input_dim,))
    x = layers.Dense(1024)(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(512)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    outputs = layers.Dense(output_dim, activation='sigmoid')(x)

    model = models.Model(inputs, outputs)
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=[]
    )
    return model

# --- 4. Seu Modelo (exemplo com mais camadas e tuning) ---
def build_custom_model(input_dim, output_dim, lr=1e-4):
    inputs = layers.Input(shape=(input_dim,))
    x = layers.Dense(2048, activation='relu')(inputs)
    x = layers.Dropout(0.4)(x)
    x = layers.Dense(1024, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(512, activation='relu')(x)
    outputs = layers.Dense(output_dim, activation='sigmoid')(x)

    model = models.Model(inputs, outputs)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
        loss='binary_crossentropy',
        metrics=[]
    )
    return model

# --- Callbacks para métricas customizadas ---
class MetricsCallback(callbacks.Callback):
    def __init__(self, validation_data):
        super().__init__()
        self.X_val, self.Y_val = validation_data

    def on_epoch_end(self, epoch, logs=None):
        y_pred = self.model.predict(self.X_val, verbose=0)
        p_at_20 = precision_at_k(self.Y_val, tf.convert_to_tensor(y_pred), k=20).numpy()
        precision, recall, f1, hamming = multilabel_metrics(tf.convert_to_tensor(self.Y_val), tf.convert_to_tensor(y_pred))

        print(f"Epoch {epoch+1} - Precision@20: {p_at_20:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}, Hamming Loss: {hamming:.4f}")

# --- Treinar Modelo Base ---
print("Treinando Modelo Base")
model_base = build_base_model(num_features, num_classes)
model_base.fit(
    X_train, Y_train,
    epochs=5,
    batch_size=32,
    callbacks=[MetricsCallback((X_val, Y_val))]
)

# --- Treinar Modelo Base + PCA (variações) ---
print("\nTreinando Modelo Base + PCA")
X_train_pca, X_val_pca = apply_pca(X_train, X_val, n_components=256)
model_base_pca = build_base_model(256, num_classes)
model_base_pca.fit(
    X_train_pca, Y_train,
    epochs=5,
    batch_size=32,
    callbacks=[MetricsCallback((X_val_pca, Y_val))]
)

# --- Treinar Modelo Base + Blocos ---
print("\nTreinando Modelo Base + Blocos")
model_blocks = build_model_with_blocks(num_features, num_classes)
model_blocks.fit(
    X_train, Y_train,
    epochs=5,
    batch_size=32,
    callbacks=[MetricsCallback((X_val, Y_val))]
)

# --- Treinar Seu Modelo ---
print("\nTreinando Seu Modelo")
model_custom = build_custom_model(num_features, num_classes, lr=5e-5)
model_custom.fit(
    X_train, Y_train,
    epochs=5,
    batch_size=32,
    callbacks=[MetricsCallback((X_val, Y_val))]
)


Treinando Modelo Base
Epoch 1/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step - loss: 0.6943Epoch 1 - Precision@20: 0.5032, Precision: 0.4993, Recall: 0.4973, F1: 0.4983, Hamming Loss: 0.5001
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 170ms/step - loss: 0.6942
Epoch 2/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - loss: 0.6930Epoch 2 - Precision@20: 0.5165, Precision: 0.4984, Recall: 0.4953, F1: 0.4968, Hamming Loss: 0.5010
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 89ms/step - loss: 0.6930
Epoch 3/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - loss: 0.6929Epoch 3 - Precision@20: 0.5005, Precision: 0.4991, Recall: 0.4969, F1: 0.4980, Hamming Loss: 0.5004
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 89ms/step - loss: 0.6929
Epoch 4/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - loss: 0.6927Epoch 4 - Precisio

<keras.src.callbacks.history.History at 0x7b274dfefd10>