In [1]:
import os
import time
import csv
from arcgis.learn import prepare_data, MaskRCNN

# --- Paths ---
training_data_path = r"C:\Users\ss2596\Documents\njoko training\Ag_Object512RCNN(TF)"
model_output_path = r"C:\Users\ss2596\Documents\Njoko_model\Ag_Object512rcnn_tf"
model_name = "maskrcnn_model"
epochs = 30
learning_rate = 0.001
batch_size = 4
patience = 5  # Stop training if val loss doesn't improve after 5 epochs

# Create output directory if it doesn't exist
os.makedirs(model_output_path, exist_ok=True)

# --- Prepare Data ---
print("📦 Preparing data...")
data = prepare_data(
    path=training_data_path,
    dataset_type='RCNN_Masks',
    batch_size=batch_size
)

# --- Initialize Model ---
print("🧠 Initializing model...")
model = MaskRCNN(data=data, backbone='resnet50')

# --- Set up metrics logging ---
metrics_file = os.path.join(model_output_path, "training_metrics.csv")
if not os.path.exists(metrics_file):
    with open(metrics_file, mode='w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(["epoch", "train_loss", "val_loss", "duration_minutes"])

# --- Training with Early Stopping ---
print(f"🚀 Starting training for up to {epochs} epochs (early stopping enabled)\n")

best_val_loss = float('inf')
no_improve_epochs = 0

for epoch in range(epochs):
    print(f"🔁 Epoch {epoch + 1}/{epochs}")
    start_time = time.time()

    # Train for one epoch
    model.fit(epochs=1, lr=learning_rate)

    # Get losses
    train_loss = model.learn.recorder.losses[-1].item()
    val_loss, *_ = model.learn.validate()

    duration = round((time.time() - start_time) / 60, 2)
    print(f"📉 Train loss: {train_loss:.4f}, Val loss: {val_loss:.4f}, Time: {duration} min")

    # Save metrics
    with open(metrics_file, mode='a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([epoch + 1, round(train_loss, 4), round(val_loss, 4), duration])

    # Save model checkpoint
    checkpoint_path = os.path.join(model_output_path, f"checkpoint_epoch_{epoch + 1}.dlpk")
    model.save(checkpoint_path, framework='PyTorch')
    print(f"💾 Checkpoint saved: {checkpoint_path}\n")

    # Early stopping check
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improve_epochs = 0
    else:
        no_improve_epochs += 1
        print(f"⚠️ No improvement for {no_improve_epochs} epoch(s)")

        if no_improve_epochs >= patience:
            print(f"🛑 Early stopping triggered at epoch {epoch + 1}")
            break

# --- Final Save ---
final_model_path = os.path.join(model_output_path, model_name)
model.save(final_model_path, framework='PyTorch')
print(f"✅ Final model saved to: {final_model_path}")


📦 Preparing data...


Please check your dataset. 8082 images dont have the corresponding label files.


🧠 Initializing model...
🚀 Starting training for up to 30 epochs (early stopping enabled)

🔁 Epoch 1/30


epoch,train_loss,valid_loss,average_precision,time
0,2.346886,2.470526,0.026398,12:27


📉 Train loss: 2.3469, Val loss: 2.4682, Time: 14.38 min
Computing model metrics...


💾 Checkpoint saved: C:\Users\ss2596\Documents\Njoko_model\Ag_Object512rcnn_tf\checkpoint_epoch_1.dlpk

🔁 Epoch 2/30


epoch,train_loss,valid_loss,average_precision,time
0,2.220289,2.408924,0.046208,12:27


📉 Train loss: 2.2203, Val loss: 2.4218, Time: 14.34 min
Computing model metrics...


💾 Checkpoint saved: C:\Users\ss2596\Documents\Njoko_model\Ag_Object512rcnn_tf\checkpoint_epoch_2.dlpk

🔁 Epoch 3/30


epoch,train_loss,valid_loss,average_precision,time
0,2.208673,2.419629,0.055921,12:33


📉 Train loss: 2.2087, Val loss: 2.4199, Time: 14.46 min
Computing model metrics...


💾 Checkpoint saved: C:\Users\ss2596\Documents\Njoko_model\Ag_Object512rcnn_tf\checkpoint_epoch_3.dlpk

🔁 Epoch 4/30


epoch,train_loss,valid_loss,average_precision,time
0,2.142982,2.398208,0.072865,12:42


📉 Train loss: 2.1430, Val loss: 2.3938, Time: 14.63 min
Computing model metrics...


💾 Checkpoint saved: C:\Users\ss2596\Documents\Njoko_model\Ag_Object512rcnn_tf\checkpoint_epoch_4.dlpk

🔁 Epoch 5/30


epoch,train_loss,valid_loss,average_precision,time
0,2.227413,2.3743,0.06438,12:35


📉 Train loss: 2.2274, Val loss: 2.3710, Time: 14.53 min
Computing model metrics...


💾 Checkpoint saved: C:\Users\ss2596\Documents\Njoko_model\Ag_Object512rcnn_tf\checkpoint_epoch_5.dlpk

🔁 Epoch 6/30


epoch,train_loss,valid_loss,average_precision,time
0,2.12474,2.370318,0.059103,12:14


📉 Train loss: 2.1247, Val loss: 2.3672, Time: 14.14 min
Computing model metrics...


💾 Checkpoint saved: C:\Users\ss2596\Documents\Njoko_model\Ag_Object512rcnn_tf\checkpoint_epoch_6.dlpk

🔁 Epoch 7/30


epoch,train_loss,valid_loss,average_precision,time
0,2.190922,2.38454,0.069831,12:12


📉 Train loss: 2.1909, Val loss: 2.3929, Time: 14.05 min
Computing model metrics...


💾 Checkpoint saved: C:\Users\ss2596\Documents\Njoko_model\Ag_Object512rcnn_tf\checkpoint_epoch_7.dlpk

⚠️ No improvement for 1 epoch(s)
🔁 Epoch 8/30


epoch,train_loss,valid_loss,average_precision,time
0,2.137218,2.335241,0.057695,12:56


📉 Train loss: 2.1372, Val loss: 2.3332, Time: 14.82 min
Computing model metrics...


💾 Checkpoint saved: C:\Users\ss2596\Documents\Njoko_model\Ag_Object512rcnn_tf\checkpoint_epoch_8.dlpk

🔁 Epoch 9/30


epoch,train_loss,valid_loss,average_precision,time
0,2.224217,2.370269,0.059767,12:27


📉 Train loss: 2.2242, Val loss: 2.3605, Time: 14.35 min
Computing model metrics...


💾 Checkpoint saved: C:\Users\ss2596\Documents\Njoko_model\Ag_Object512rcnn_tf\checkpoint_epoch_9.dlpk

⚠️ No improvement for 1 epoch(s)
🔁 Epoch 10/30


epoch,train_loss,valid_loss,average_precision,time
0,2.196193,2.377312,0.069929,12:33


📉 Train loss: 2.1962, Val loss: 2.3754, Time: 14.47 min
Computing model metrics...


💾 Checkpoint saved: C:\Users\ss2596\Documents\Njoko_model\Ag_Object512rcnn_tf\checkpoint_epoch_10.dlpk

⚠️ No improvement for 2 epoch(s)
🔁 Epoch 11/30


epoch,train_loss,valid_loss,average_precision,time
0,2.138847,2.374719,0.075057,12:17


📉 Train loss: 2.1388, Val loss: 2.3836, Time: 14.15 min
Computing model metrics...


💾 Checkpoint saved: C:\Users\ss2596\Documents\Njoko_model\Ag_Object512rcnn_tf\checkpoint_epoch_11.dlpk

⚠️ No improvement for 3 epoch(s)
🔁 Epoch 12/30


epoch,train_loss,valid_loss,average_precision,time
0,2.157749,2.352632,0.06512,12:42


📉 Train loss: 2.1577, Val loss: 2.3587, Time: 14.58 min
Computing model metrics...


💾 Checkpoint saved: C:\Users\ss2596\Documents\Njoko_model\Ag_Object512rcnn_tf\checkpoint_epoch_12.dlpk

⚠️ No improvement for 4 epoch(s)
🔁 Epoch 13/30


epoch,train_loss,valid_loss,average_precision,time
0,2.049789,2.377697,0.075841,12:28


📉 Train loss: 2.0498, Val loss: 2.3726, Time: 14.33 min
Computing model metrics...


💾 Checkpoint saved: C:\Users\ss2596\Documents\Njoko_model\Ag_Object512rcnn_tf\checkpoint_epoch_13.dlpk

⚠️ No improvement for 5 epoch(s)
🛑 Early stopping triggered at epoch 13
✅ Final model saved to: C:\Users\ss2596\Documents\Njoko_model\Ag_Object512rcnn_tf\maskrcnn_model
