In [1]:
from arcgis.learn import FasterRCNN, prepare_data
import torch
import os

# ---- Training settings ----
epochsNum = 10  # Number of training epochs
batchNum = 4    # Batch size
chipSize = 512  # Recommended chip size for object detection (not too large)
numWorkers = 4  # CPU workers for data loading

# ---- Paths ----
training_data_path = r"C:\Users\ss2596\Documents\njoko training\Ag_Object512(truefalse)"
model_output_path = r"C:\Users\ss2596\Documents\Njoko_model\Ag_Object512(truefalse)"

# ---- Prepare object detection data ----
print("📦 Preparing object detection data...", flush=True)
data = prepare_data(
    path=training_data_path,
    batch_size=batchNum,
    chip_size=chipSize,
    num_workers=numWorkers,
    dataset_type="ObjectDetection"  # Crucial for object detection
)

# ---- Initialize FasterRCNN model ----
print("🧠 Initializing FasterRCNN model...", flush=True)
model = FasterRCNN(data)


# ---- Create output folder if needed ----
os.makedirs(model_output_path, exist_ok=True)

# ---- Detect and display GPU device ----
if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(torch.cuda.current_device())
    print(f"⚙️ Using GPU: {gpu_name}", flush=True)
else:
    print("⚙️ Using CPU", flush=True)


📦 Preparing object detection data...


Please check your dataset. 29131 images dont have the corresponding label files.


🧠 Initializing FasterRCNN model...
⚙️ Using GPU: NVIDIA GeForce RTX 3060


In [2]:
import os
import time
import csv


# Define path for saving training metrics
metrics_file = os.path.join(model_output_path, "training_metrics.csv")

# Write header to metrics CSV if it doesn't already exist
if not os.path.exists(metrics_file):
    with open(metrics_file, mode='w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(["epoch", "train_loss", "valid_loss", "accuracy", "dice", "duration_mins"])

# ---- Early stopping setup ----
best_loss = float('inf')     # Lowest validation loss seen so far
patience = 3                 # Stop training if no improvement after this many epochs
no_improve_epochs = 0        # Counter for consecutive non-improving epochs

# ---- Begin training loop ----
for epoch in range(epochsNum):
    print(f"\n🔁 Starting epoch {epoch + 1}/{epochsNum}...", flush=True)
    start_time = time.time()

    # Train for one epoch
    model.fit(1)

    # ---- Collect training and validation metrics ----
    learner = model.learn
    train_loss = learner.recorder.losses[-1].item()               # Last training loss
    valid_loss, *metrics = learner.validate()                     # Validation loss and metrics
    accuracy = float(metrics[0]) if len(metrics) > 0 else None    # Extract accuracy if available
    dice = float(metrics[1]) if len(metrics) > 1 else None        # Extract dice coefficient if available
    duration = round((time.time() - start_time) / 60, 2)          # Duration in minutes

    # ---- Append metrics to CSV ----
    with open(metrics_file, mode='a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([
            epoch + 1,
            round(train_loss, 4),
            round(valid_loss, 4),
            round(accuracy, 4) if accuracy is not None else None,
            round(dice, 4) if dice is not None else None,
            duration
        ])

    # ---- Save checkpoint for this epoch ----
    checkpoint_path = os.path.join(model_output_path, f"checkpoint_epoch_{epoch + 1}.dlpk")
    model.save(checkpoint_path, framework='PyTorch')
    print(f"💾 Checkpoint saved: {checkpoint_path}", flush=True)


    # ---- Early stopping check ----
    if valid_loss < best_loss:
        best_loss = valid_loss             # New best model
        no_improve_epochs = 0              # Reset counter
    else:
        no_improve_epochs += 1             # Increment counter
        print(f"📉 No improvement. {no_improve_epochs} consecutive epochs without improvement.")

    # Trigger early stop if patience limit is reached
    if no_improve_epochs >= patience:
        print(f"🛑 Early stopping triggered after {patience} epochs without improvement.")
        break

# ---- Save final model after training completes or early stopping ----
final_model_path = os.path.join(model_output_path, "final_model.dlpk")
model.save(final_model_path, framework='PyTorch')
print(f"\n🎯 Final model saved: {final_model_path}", flush=True)

print("\n✅ Training complete.")


epoch,train_loss,valid_loss,average_precision,time
0,0.075554,0.084317,0.911746,11:51


Computing model metrics...
💾 Checkpoint saved: C:\Users\ss2596\Documents\Njoko_model\Ag_Object512(truefalse)\checkpoint_epoch_6.dlpk
📉 No improvement. 3 consecutive epochs without improvement.
🛑 Early stopping triggered after 3 epochs without improvement.

🎯 Final model saved: C:\Users\ss2596\Documents\Njoko_model\Ag_Object512(truefalse)\final_model.dlpk

✅ Training complete.
