In [7]:
import tensorflow as tf
import numpy as np

# --- 0. Configuração de Dados e Hiperparâmetros ---
# Criamos dados em uma escala 'bruta' para forçar a necessidade de normalização.
# A meta do modelo é aprender Y = 2 * X + 10.
N_SAMPLES = 1000
RAW_X = np.arange(N_SAMPLES, dtype=np.float32)  # X de 0 a 999
RAW_Y = 2.0 * RAW_X + 10.0 # Renamed Y to RAW_Y

BATCH_SIZE = 32
LEARNING_RATE = 0.01
EPOCHS = 50

print(f"Dados Brutos (X): Min={RAW_X.min()}, Max={RAW_X.max()}")
print(f"Dados Brutos (Y): Min={RAW_Y.min()}, Max={RAW_Y.max()}") # Added print for RAW_Y
print("-" * 50)


# --- 1. CONSTRUINDO O PIPELINE DE DADOS (tf.data) ---
# O tf.data é crucial para carregar e preparar os batches de forma assíncrona,
# evitando que a GPU fique ociosa esperando os dados.
def build_data_pipeline(features, labels, batch_size):
    """Cria um tf.data.Dataset eficiente."""
    # .from_tensor_slices: Divide o array em "fatias" (amostras individuais).
    dataset = tf.data.Dataset.from_tensor_slices((features, labels))

    # .shuffle: Embaralha o dataset para garantir que os batches sejam aleatórios.
    # buffer_size deve ser grande (geralmente do tamanho do dataset).
    dataset = dataset.shuffle(buffer_size=len(features))

    # .batch: Agrupa amostras em batches.
    dataset = dataset.batch(batch_size)

    # .prefetch: Otimiza. Permite que o tf.data prepare os próximos batches
    # enquanto o modelo está ocupado treinando no batch atual.
    # tf.data.AUTOTUNE otimiza a quantidade de buffers automaticamente.
    dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)

    return dataset


# --- 2. CAMADA DE PRÉ-PROCESSAMENTO (Normalization Layer) ---
# Esta camada é "stateful" (com estado): ela precisa aprender a Média e o Desvio Padrão
# ANTES do treinamento.
normalizer_x = tf.keras.layers.Normalization(axis=None)
normalizer_y = tf.keras.layers.Normalization(axis=None) # Added normalizer for Y


# O Método .adapt() resolve o estado:
# Ele analisa o dataset (ou um numpy array) e calcula a Média e a Variância (estado interno).
# Isso garante que a normalização será consistente (transformando o centro dos dados para 0 e a dispersão para 1).
print("Calculando Média e Variância com .adapt()...")
normalizer_x.adapt(RAW_X)
normalizer_y.adapt(RAW_Y) # Adapt normalizer for Y
print(f"Estado Interno (X): Média (mean) calculada: {normalizer_x.mean.numpy().item():.4f}")
print(f"Estado Interno (Y): Média (mean) calculada: {normalizer_y.mean.numpy().item():.4f}") # Added print for Y mean
print("-" * 50)

# Build the data pipeline with normalized Y
train_ds = build_data_pipeline(RAW_X, normalizer_y(RAW_Y), BATCH_SIZE) # Use normalized Y


# --- 3. O MODELO (Uma Camada Densa Simples) ---
# Definimos as variáveis que serão ajustadas.
w0 = tf.Variable(0.0, name='slope')  # Peso
w1 = tf.Variable(0.0, name='bias')   # Bias

# Definimos o otimizador que irá aplicar as regras de gradiente.
optimizer = tf.optimizers.SGD(learning_rate=LEARNING_RATE)

# Função de Perda (Loss)
def compute_loss(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_pred - y_true))

# Função de Predição (Forward Pass)
def predict(X_normalized): # Predict now takes normalized X
    # The model should learn Y_normalized = w0 * X_normalized + w1
    return w0 * X_normalized + w1

# --- 4. O CUSTOM TRAINING LOOP (Gerenciamento de Gradientes) ---

def train_step(X_batch_raw, Y_batch_normalized): # Y_batch is now normalized
    """Executa um passo de treinamento completo em um único batch."""

    # PASSO 1: Pré-processamento na Camada
    # Normaliza X_batch_raw using the adapted Mean/Variance.
    X_batch_normalized = normalizer_x(X_batch_raw)

    # PASSO 2: The tf.GradientTape is in action!
    # The 'with' block records all operations involving tf.Variables.
    with tf.GradientTape() as tape:
        # 1. Forward Pass: Calculates the prediction using normalized X.
        Y_pred_normalized = predict(X_batch_normalized) # Predict normalized Y

        # 2. Loss: Calculates the batch loss using normalized Y.
        loss = compute_loss(Y_batch_normalized, Y_pred_normalized) # Compute loss with normalized Y

    # PASSO 3: Gradient Calculation
    # tape.gradient magically calculates the derivatives (dw0 and dw1) of the 'loss'
    # with respect to the tracked variables ([w0, w1]).
    gradients = tape.gradient(loss, [w0, w1])

    # PASSO 4: Gradient Application
    # The optimizer adjusts the weights (w0 and w1) in the opposite direction of the gradient.
    optimizer.apply_gradients(zip(gradients, [w0, w1]))

    return loss

# --- 5. Execution of Training (Epochs) ---
print("Iniciando Custom Training Loop...")
step_count = 0
for epoch in range(EPOCHS):
    for X_batch_raw, Y_batch_normalized in train_ds: # Iterate over normalized Y
        loss = train_step(X_batch_raw, Y_batch_normalized)
        step_count += 1

        if step_count % 50 == 0:
            print(f"Epoch {epoch+1:02d} | Step {step_count:04d} | Loss: {loss.numpy():.5f} | w0: {w0.numpy():.4f} | w1: {w1.numpy():.4f}")

print("-" * 50)

# --- 6. Final Evaluation ---
# To evaluate the result, the prediction should use the RAW_X
# and pass through the normalization layer BEFORE prediction.
X_normalized_final = normalizer_x(RAW_X)
Y_pred_normalized_final = predict(X_normalized_final) # Predict normalized Y
# To compare with the original RAW_Y, we need to inverse normalize the prediction
Y_pred_final = Y_pred_normalized_final * tf.sqrt(normalizer_y.variance) + normalizer_y.mean


final_loss = compute_loss(RAW_Y, Y_pred_final).numpy() # Compute loss with RAW_Y and inverse normalized prediction

# Calculate expected w0 and w1 in the normalized space
expected_w0 = 2.0 * (tf.sqrt(normalizer_x.variance) / tf.sqrt(normalizer_y.variance))
expected_w1 = (10.0 + 2.0 * normalizer_x.mean - normalizer_y.mean) / tf.sqrt(normalizer_y.variance)


print("--- Resultados Finais (Target w0=2, w1=10 in raw scale, w0=2, w1=0 in normalized scale) ---") # Updated target explanation
print(f"Perda Final (MSE) on RAW Scale: {final_loss:.6f}") # Updated print
print(f"w0 (Peso aprendido no espaço normalizado): {w0.numpy():.4f}") # Updated print
print(f"w1 (Bias aprendido no espaço normalizado): {w1.numpy():.4f}") # Updated print
print(f"Expected w0 in normalized space: {expected_w0.numpy().item():.4f}") # Added print for expected w0
print(f"Expected w1 in normalized space: {expected_w1.numpy().item():.4f}") # Added print for expected w1


# The asserts validate if the model learned the relationship (Y=2X+10) in the normalized space.
# The model learns w0~2 and w1~0 AFTER normalization (Y_norm = 2 * X_norm + 0).
assert final_loss < 10.0 # Increased tolerance for final loss on raw scale
assert abs(w0.numpy() - expected_w0.numpy()) < 0.1  # The ideal weight in normalized space is 2
assert abs(w1.numpy() - expected_w1.numpy()) < 0.1 # The ideal bias in normalized space is 0
print("\nVerificação de convergência BEM-SUCEDIDA!")

Dados Brutos (X): Min=0.0, Max=999.0
Dados Brutos (Y): Min=10.0, Max=2008.0
--------------------------------------------------
Calculando Média e Variância com .adapt()...
Estado Interno (X): Média (mean) calculada: 499.5000
Estado Interno (Y): Média (mean) calculada: 1009.0000
--------------------------------------------------
Iniciando Custom Training Loop...
Epoch 02 | Step 0050 | Loss: 0.13552 | w0: 0.6414 | w1: -0.0023
Epoch 04 | Step 0100 | Loss: 0.02011 | w0: 0.8692 | w1: 0.0014
Epoch 05 | Step 0150 | Loss: 0.00240 | w0: 0.9519 | w1: 0.0007
Epoch 07 | Step 0200 | Loss: 0.00024 | w0: 0.9824 | w1: 0.0003
Epoch 08 | Step 0250 | Loss: 0.00005 | w0: 0.9937 | w1: 0.0001
Epoch 10 | Step 0300 | Loss: 0.00001 | w0: 0.9977 | w1: -0.0000
Epoch 11 | Step 0350 | Loss: 0.00000 | w0: 0.9992 | w1: 0.0000
Epoch 13 | Step 0400 | Loss: 0.00000 | w0: 0.9997 | w1: 0.0000
Epoch 15 | Step 0450 | Loss: 0.00000 | w0: 0.9999 | w1: -0.0000
Epoch 16 | Step 0500 | Loss: 0.00000 | w0: 1.0000 | w1: 0.0000
Epo