# Entrenamiento baseline
Modelo sencillo (ResNet18) que predice Dry_Clover_g, Dry_Green_g y Dry_Dead_g a partir de cada foto.

In [None]:
import os
import sys
import random
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Detectar si estamos en Google Colab
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=False)
    project_path = '/content/drive/MyDrive/image2biomass'
    if os.path.exists(project_path):
        os.chdir(project_path)
        print(f"Directorio de trabajo cambiado a: {os.getcwd()}")
    else:
        print(f"Advertencia: No se encontró el directorio {project_path}")
    !pip install mlflow
else:
    sys.path.append('../')

from utils.paths import get_data_path

import mlflow
import mlflow.pytorch
import torch
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from src.utils.seed import set_seed
from src.utils.config import TrainingConfig
from src.utils.metrics import DEFAULT_TARGET_WEIGHTS, weighted_r2_score
from src.data.dataloader import make_dataloaders
from src.models.resnet import create_resnet
from src.training.trainer import Trainer
from src.inference.predictor import Predictor

SEED = 42
cfg = TrainingConfig()
set_seed(SEED)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

In [None]:
# Configurar MLflow
os.environ["MLFLOW_TRACKING_URI"] = "https://abcd-12-34-56.ngrok-free.app"
os.environ["MLFLOW_TRACKING_INSECURE_TLS"] = "true"  # importante para ngrok
mlflow.set_experiment("biomass_prediction")
print(f"MLflow tracking URI: {mlflow.get_tracking_uri()}")

In [None]:
base_path = Path(get_data_path())
train_df = pd.read_csv(base_path / 'train.csv')
test_df = pd.read_csv(base_path / 'test.csv')

targets = ['Dry_Clover_g', 'Dry_Green_g', 'Dry_Dead_g']
pivot = (
    train_df
    .pivot_table(index='image_path', columns='target_name', values='target')
    .reset_index()
)
pivot = pivot[['image_path'] + targets].dropna().reset_index(drop=True)
print(f"Imagenes disponibles: {len(pivot)}")
pivot.head()

In [None]:
# Split simple 80/20
perm = np.random.permutation(len(pivot))
split = int(len(pivot) * 0.8)
train_meta = pivot.iloc[perm[:split]].reset_index(drop=True)
val_meta = pivot.iloc[perm[split:]].reset_index(drop=True)

train_loader, val_loader, val_tfms = make_dataloaders(
    train_meta,
    val_meta,
    targets=targets,
    images_root=base_path,
    img_size=cfg.img_size,
    batch_size=cfg.batch_size,
    num_workers=cfg.num_workers,
    )
len(train_meta), len(val_meta)

In [None]:
# Iniciar experimento MLflow
with mlflow.start_run(run_name="resnet18_baseline_frozen"):
    # Log de hiperparámetros
    mlflow.log_params({
        "model": "ResNet18",
        "img_size": cfg.img_size,
        "batch_size": cfg.batch_size,
        "lr": cfg.lr,
        "epochs": cfg.epochs,
        "early_stopping_patience": cfg.early_stopping_patience,
        "freeze_backbone": cfg.freeze_backbone,
        "optimizer": "Adam",
        "loss": "MSE",
        "seed": SEED,
        "num_train_samples": len(train_meta),
        "num_val_samples": len(val_meta)
    })
    
    # Crear modelo con backbone congelado
    model = create_resnet(len(targets), freeze_backbone=cfg.freeze_backbone).to(device)
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=cfg.lr)
    
    # Log del número de parámetros entrenables
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in model.parameters())
    mlflow.log_params({
        "trainable_params": trainable_params,
        "total_params": total_params,
        "trainable_ratio": f"{trainable_params/total_params:.4f}"
    })
    print(f"Parámetros entrenables: {trainable_params:,} / {total_params:,} ({trainable_params/total_params:.2%})")
    
    # Entrenamiento
    trainer = Trainer(model, criterion, optimizer, device)
    history = trainer.fit(
        train_loader, 
        val_loader, 
        epochs=cfg.epochs, 
        checkpoint_path=cfg.checkpoint_path,
        early_stopping_patience=cfg.early_stopping_patience
    )
    
    # Log de métricas por época
    for record in history:
        mlflow.log_metrics({
            "train_loss": record["train_loss"],
            "val_loss": record["val_loss"],
            "best_val_loss": record["best_val"],
        }, step=record["epoch"])
    
    # Cargar el mejor modelo
    model.load_state_dict(torch.load(cfg.checkpoint_path, map_location=device))
    model.eval()
    
    # Log del modelo en MLflow
    mlflow.pytorch.log_model(model, "model")
    
    # Curvas de entrenamiento
    history_df = pd.DataFrame(history)
    plt.style.use("seaborn-v0_8-darkgrid")
    fig, ax = plt.subplots(figsize=(7, 4))
    ax.plot(history_df["epoch"], history_df["train_loss"], marker="o", label="Train loss")
    ax.plot(history_df["epoch"], history_df["val_loss"], marker="o", label="Val loss")
    ax.set_xlabel("Época")
    ax.set_ylabel("MSE")
    ax.set_title("Evolución de la pérdida")
    ax.legend()
    ax.grid(True, alpha=0.3)
    fig.tight_layout()
    
    # Guardar gráfica en MLflow
    mlflow.log_figure(fig, "training_loss_curve.png")
    plt.show()
    
    # Métrica ponderada y dispersión de validación
    y_true, y_pred, name_labels = [], [], []

    with torch.no_grad():
        for images, targets_batch in val_loader:
            images = images.to(device)
            preds = model(images).cpu().numpy()
            targets_np = targets_batch.numpy()
            batch_size = targets_np.shape[0]
            y_true.append(targets_np.reshape(-1))
            y_pred.append(preds.reshape(-1))
            name_labels.append(np.tile(np.array(targets), batch_size))

    y_true_flat = np.concatenate(y_true)
    y_pred_flat = np.concatenate(y_pred)
    name_labels_flat = np.concatenate(name_labels)

    # Calcular métricas globales
    val_weighted_r2 = weighted_r2_score(
        y_true=y_true_flat,
        y_pred=y_pred_flat,
        target_names=name_labels_flat,
        target_weights=DEFAULT_TARGET_WEIGHTS,
    )
    
    global_mae = mean_absolute_error(y_true_flat, y_pred_flat)
    global_rmse = np.sqrt(mean_squared_error(y_true_flat, y_pred_flat))
    global_r2 = r2_score(y_true_flat, y_pred_flat)
    
    print(f"\n=== Métricas globales de validación ===")
    print(f"R² ponderado: {val_weighted_r2:.4f}")
    print(f"MAE: {global_mae:.4f}")
    print(f"RMSE: {global_rmse:.4f}")
    print(f"R²: {global_r2:.4f}")
    
    # Log de métricas globales
    mlflow.log_metrics({
        "val_weighted_r2": val_weighted_r2,
        "val_mae": global_mae,
        "val_rmse": global_rmse,
        "val_r2": global_r2
    })
    
    # Calcular métricas por target
    print(f"\n=== Métricas por target ===")
    for target_name in targets:
        mask = name_labels_flat == target_name
        y_true_target = y_true_flat[mask]
        y_pred_target = y_pred_flat[mask]
        
        mae = mean_absolute_error(y_true_target, y_pred_target)
        rmse = np.sqrt(mean_squared_error(y_true_target, y_pred_target))
        r2 = r2_score(y_true_target, y_pred_target)
        
        print(f"{target_name}: MAE={mae:.4f}, RMSE={rmse:.4f}, R²={r2:.4f}")
        
        # Log métricas por target
        mlflow.log_metrics({
            f"{target_name}_mae": mae,
            f"{target_name}_rmse": rmse,
            f"{target_name}_r2": r2
        })

    # Gráfico de dispersión
    fig, axes = plt.subplots(1, len(targets), figsize=(4 * len(targets), 4), sharex=False, sharey=False)
    for idx, target_name in enumerate(targets):
        mask = name_labels_flat == target_name
        y_true_target = y_true_flat[mask]
        y_pred_target = y_pred_flat[mask]
        
        ax = axes[idx] if len(targets) > 1 else axes
        ax.scatter(y_true_target, y_pred_target, alpha=0.5, s=14, label="Pred vs GT")
        min_val = min(y_true_target.min(), y_pred_target.min())
        max_val = max(y_true_target.max(), y_pred_target.max())
        ax.plot([min_val, max_val], [min_val, max_val], color="tab:red", linewidth=1, label="Ideal")
        
        # Calcular R² para el título
        r2 = r2_score(y_true_target, y_pred_target)
        ax.set_title(f"{target_name}\nR²={r2:.3f}")
        ax.set_xlabel("Ground truth")
        ax.set_ylabel("Predicción")
        ax.legend()
        
    fig.suptitle("Dispersión predicciones vs verdad en validación", y=1.02, fontsize=12)
    fig.tight_layout()
    
    # Guardar gráfica en MLflow
    mlflow.log_figure(fig, "validation_scatter_plots.png")
    plt.show()
    
    # Predicciones y submission
    predictor = Predictor(model, device)
    submission = predictor.predict(
        test_df,
        targets,
        images_root=base_path,
        transform=val_tfms,
        batch_size=cfg.batch_size,
        num_workers=cfg.num_workers,
    )
    os.makedirs(cfg.model_dir, exist_ok=True)
    submission.to_csv(cfg.model_dir / 'submission_baseline.csv', index=False)
    
    # Log del archivo de submission
    mlflow.log_artifact(str(cfg.model_dir / 'submission_baseline.csv'), "submission")
    
    print(f"\n=== Experimento completado ===")
    print(f"Run ID: {mlflow.active_run().info.run_id}")
    print(f"Submission guardado en: {cfg.model_dir / 'submission_baseline.csv'}")

submission.head()