# Mask R-CNN - Train on particles dataset


This notebook shows how to train Mask RCNN sequentially so that one can train starting with synthetic data, then retrain models on manual segmentations for better performance. 

Note that work has been adapted and extends upon the "train_shapes" sample from matterport's Mask RCNN release.


## Initialization and configs


In [1]:
#TODO - look at creating some sort of __init__ file, 
#       so some of these imports dont have to be shown every time

import os
import warnings
import sys
# Set which GPU to use dynamically (e.g., "4" for GPU 4)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# Suppress WARNING and INFO logs, shows only ERRORs
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"   

#Supress model state warnings

warnings.filterwarnings("ignore", message=".*Model.state_updates.*")


import tensorflow as tf
# Uncomment for debugging/TF build
# print("TensorFlow version:", tf.__version__)
# print("Built with CUDA:", tf.test.is_built_with_cuda())
# print("GPU Available:", tf.config.list_physical_devices('GPU'))

#==============================================================
# CORE Libraries
#==============================================================
#TODO - clean u unused importeds that are now in other .py files or make single __init__.py file
import random, math, re, time, csv, tempfile
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
from IPython.display import clear_output

#==============================================================
#  PROJECT DIRECTORIES
#==============================================================

# Root directory of the project
ROOT_DIR = os.path.abspath("../SAGE")
print("ROOT DIR:", ROOT_DIR)

#Subdirectores:
PRETRAIN_DIR = os.path.join(ROOT_DIR,"pretrained_models") #Pretrained models folder, where models already trained (by us) are stored
MODEL_DIR = os.path.join(ROOT_DIR, "logs") # Directory to save logs and trained model
Results_DIR = os.path.join(ROOT_DIR, "Results") # Directory to save results (images, etc)
#TODO: remove for final version?
DATA_DIR = os.path.abspath(os.path.join(ROOT_DIR, "../../Data/logs")) # Using this to save models into for storage on disk 

print("PRETRAINED MODELS DIRECTORY:", PRETRAIN_DIR)
print("MODEL DIRECTORY:", MODEL_DIR)
print("Results DIR:", Results_DIR)
print("DATA DIR (local disk log saving):", DATA_DIR) 

#==============================================================
#  MASK RCNN SETUP
#==============================================================

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log
from mrcnn.utils import print_verbose

mrcnn_dir = os.path.dirname(modellib.__file__)
model_file_path = os.path.join(mrcnn_dir,'model.py')
print("mrcnn directory:",mrcnn_dir)
print("Path to model.py:", model_file_path)


# ENSURE COCO weights are available
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)
    
print("COCO MODEL PATH:", COCO_MODEL_PATH)

#import callbacks
from mrcnn.callbacks import MeanAveragePrecisionCallback
from mrcnn.callbacks import TrainingLogger

# Print out some useful info about the environment --- IGNORE ---
#print("sys.prefix:",sys.prefix)
#print("sys.executable:",sys.executable)
#print("sys.path:", sys.path)
%matplotlib inline 


#import tensorboard stuff
%load_ext tensorboard




2025-09-10 11:52:42.617210: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-09-10 11:52:42.617262: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-09-10 11:52:42.619557: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


ROOT DIR: /home/timd/Desktop/githubrepo/SAGE
PRETRAINED MODELS DIRECTORY: /home/timd/Desktop/githubrepo/SAGE/pretrained_models
MODEL DIRECTORY: /home/timd/Desktop/githubrepo/SAGE/logs
Results DIR: /home/timd/Desktop/githubrepo/SAGE/Results
DATA DIR (local disk log saving): /home/timd/Desktop/Data/logs
Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
mrcnn directory: /home/timd/Desktop/githubrepo/SAGE/mrcnn
Path to model.py: /home/timd/Desktop/githubrepo/SAGE/mrcnn/model.py
Downloading pretrained model to /home/timd/Desktop/githubrepo/SAGE/mask_rcnn_coco.h5 ...
... done downloading pretrained model!
COCO MODEL PATH: /home/timd/Desktop/githubrepo/SAGE/mask_rcnn_coco.h5


In [2]:
#TODO - figure out if I should create a .py script wth default sage config or what?
class SAGEConfig(Config):
    """Configuration for training on the toy shapes dataset.
    Derives from the base Config class and overrides values specific
    to the toy shapes dataset.
    """
    # Give the configuration a recognizable name
    NAME = "SAGE"

    # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
    # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 2

    # Number of classes (including background)
    NUM_CLASSES = 1 + 1 #+ 1  # background + particle + cluster 

  # Input image resizing
    # Generally, use the "square" resizing mode for training and predicting
    # and it should work well in most cases. In this mode, images are scaled
    # up such that the small side is = IMAGE_MIN_DIM, but ensuring that the
    # scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is
    # padded with zeros to make it a square so multiple images can be put
    # in one batch.
    # Available resizing modes:
    # none:   No resizing or padding. Return the image unchanged.
    # square: Resize and pad with zeros to get a square image
    #         of size [max_dim, max_dim].
    # pad64:  Pads width and height with zeros to make them multiples of 64.
    #         If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales
    #         up before padding. IMAGE_MAX_DIM is ignored in this mode.
    #         The multiple of 64 is needed to ensure smooth scaling of feature
    #         maps up and down the 6 levels of the FPN pyramid (2**6=64).
    # crop:   Picks random crops from the image. First, scales the image based
    #         on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of
    #         size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only.
    #         IMAGE_MAX_DIM is not used in this mode.
    IMAGE_RESIZE_MODE = "square"
    IMAGE_MIN_DIM = 1024
    IMAGE_MAX_DIM = 1024
    # Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further
    # up scaling. For example, if set to 2 then images are scaled up to double
    # the width and height, or more, even if MIN_IMAGE_DIM doesn't require it.
    # However, in 'square' mode, it can be overruled by IMAGE_MAX_DIM.
    IMAGE_MIN_SCALE = 0
    # Number of color channels per image. RGB = 3, grayscale = 1, RGB-D = 4
    # Changing this requires other changes in the code. See the WIKI for more
    # details: https://github.com/matterport/Mask_RCNN/wiki
    IMAGE_CHANNEL_COUNT = 3 #images are grayscale(8bit) so may need to change to 1
    
    MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) #may need to change to one value for grayscale

    # Default 
    RPN_ANCHOR_SCALES = (32, 64, 128, 256,512) #(16,32,64,128,256)#  # anchor side in pixels
                #opt to change to smaller values to recognize smaller particles as well

    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 256
        #increase from 128 to 256 to allow attempt more 
    DETECTION_MAX_INSTANCES = 200 #increase from 100 to 200
    # Use a small epoch since the data is simple
    STEPS_PER_EPOCH =76 # 76 for 200 #20 for 60 #188 for 750
                    
    #non-maximum suppression threshold for detection
    DETECTION_MIN_CONFIDENCE = 0.7
    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 25#num of validation images/batch size
    
    #EARLY STOPPING
    EARLY_STOPPING_MONITOR = 'val_loss'
    EARLY_STOPPING_PATIENCE = 10 #number of epochs with no improvement required to stop
    ES_RESTORE_BEST_WEIGHTS =True
    ES_MODE= "min"
    ES_VERBOSE = 0
    
config = SAGEConfig()


# Load Datasets:

Here, load the image sets that will be used for training and validation. Multiple can be loaded to facilitate easy access and transfer learning. We load them using an extended dataset class called SAGE_Dataset.

We define the datasets to load in a list of tuples, where each tuple contains:
1. The dataset name 
2. A boolean `use_results` indicated whether a results directory should be created

In this examples, we will load the following datasets:
* `NS40_train`, `NS40_val`: synthetically generate TEM images for training synthetic model (SAGE<sub>0</sub>).
* `D1e1_train`, `D1e1_val`: real TEM images annotatated manually for training a second model (SAGE<sub>1</sub>) to fine tune SAGE<sub>0</sub>.
* `D2e1_train`, `D2e1_val`: same real TEM images, but annoted by different analysts. Thses datasets are used to train a third model (SAGE<sub>2</sub>) so that it can general better to human variation in manual annotation. 

Since these datasets are used in training only, we set `use_results=False` so that no results directories are created.

In [None]:
#List of tuples: (dataset_name, use_results)

datasets_to_load= [
    # dataset for synthetic model training
    #('NS40_train', False),
    #('NS40_val', False),
    # datasets for fine tuning with real TEM images
    ('D1e1_train', False),
    ('D1e1_val', False),
    # datasets for fine tuning with real TEM images annotated by different analysts
    #('D2e1_train', False),
    #('D2e1_val', False),
]

datasets = {}

#loop through list, loading each dataset
for idx, (name, use_results) in enumerate(datasets_to_load, start=1):
    clear_output(wait=True)
    pbar_datasets = tqdm(total=len(datasets_to_load), desc="Loading datasets", dynamic_ncols=True, position=0, leave=True, initial=idx-1, bar_format="{l_bar}{bar} {n_fmt}/{total_fmt}")
    datasets[name] = utils.load_and_register_dataset(name,ROOT_DIR, Results_DIR, create_dirs=use_results)
    pbar_datasets.update(1)
    


Next, we can double check that masks are correctly assigned for each dataset/. For each dataset, it loads the first image and retreives the masks and class ID's, displaying them using `visualize.display_top_masks`. 

If there are multiple classes (separated by folders in each dataset's directory), the masks for each class will show as well. This helps catch any masks assigned to the wrong image or class.

In [None]:
for name, dataset in datasets.items():
    print(f"\n--- Dataset: {name} ---")
    if len(dataset.image_ids) ==0: 
        print("No images loaded")
        continue
    image_id = dataset.image_ids[0]
    print(f"Image ID:{image_id}")
    
    image = dataset.load_image(image_id)
    mask, class_ids = dataset.load_mask(image_id)
    
    print(f"Mask shape for Image ID {image_id}: {mask.shape}")
    visualize.display_top_masks(image, mask, class_ids, dataset.class_names)


# Training Procedure

To train a model we follow the following steps:
* 1. Model Initalization
* 2. Select Datasets
* 3. Define custom callbacks
* 4. Define training schedule and train

## 1. Model Initialization 

Now we will create and train our first model, trained on the synthetic images (`NS40_train`, `NS40_val`). 

First, we create a Mask R-CNN mdoel in training mode and specify which weights to start with.


For our SAGE<sub>0</sub> model, we will choose COCO weights (`init_with=coco`) as starting weights. 

If we wanted to use a previously trained model as a starting point (like for SAGE<sub>1</sub> or SAGE<sub>2</sub>), we can load either the last model trained (`init_with=last`), or specify a path (`init_with=manual`, `manual_path=path/to/model.h5`). 

In [None]:
#TODO-remove DATA_DIR for actual push
DATA_DIR = os.path.abspath(os.path.join(ROOT_DIR, "../../Data/logs"))
print("DATA DIRECTORY:", DATA_DIR)

# Create model in training mode
model = modellib.MaskRCNN(mode="training", 
                          config=config,
                          model_dir=DATA_DIR #switch to MODEL_DIR for push
                          )



# Which weights to start with?
init_with = "manual"  # imagenet, coco, last, or manual 

#if you want to give it a specific path to train from:
manual_path = os.path.join(PRETRAIN_DIR, "SAGE_0/SAGE_0.h5") #pretrained directory holds models provided in work. 
# or manual_path = os.path.join(logs, "model_folder/model_epoch.h5") 

if init_with == "imagenet":
    model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
    # Load weights trained on MS COCO, but skip layers that
    # are different due to the different number of classes
    # See README for instructions to download the COCO weights
    model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with =="manual":
    #load the weights from specified path
    model.load_weights(manual_path, by_name=True)

elif init_with == "last":
    # Load the last model you trained and continue training
    model.load_weights(model.find_last(), by_name=True)

Before training our model, we need to define the training/validation datasets, any custom callbacks, and the phases (stages) we want to train our model with.


## 2. Select Datasets:

First, we will choose the datasets for training and validation from the sets loaded previousy, using `datasets.get('Dataset_name', None)`. If needed, we can check which sets are loaded and available for use using `print_loaded_datasets(datasets)`.

We can also adjust the number of training and validation steps per epoch (`STEPS_PER_EPOCH` and `VALIDATION_STEPS`) based on dataset size and batch size if desired.


In [None]:
utils.print_loaded_datasets(datasets)

In [None]:
# -----------Define datasets---------------------------

dataset_train = datasets.get('D1e1_train', None) 
dataset_val = datasets.get('D1e1_val', None)

#Adjust step sizes based on batch size
config.STEPS_PER_EPOCH = math.ceil(len(dataset_train.image_ids) / config.BATCH_SIZE)
config.VALIDATION_STEPS = math.ceil(len(dataset_val.image_ids) / config.BATCH_SIZE)

config.display()


## 3. Define custom callbacks

Next, we will load any custom callbacks, either by writing in a new class of type `tf.keras.callbacks.CALLBACK` or by calling any custom callbacks stored in the `callbacks.py` script. We have two custom callbacks we will be utilizing:

`1. MeanAveragePrecisionCallback()`
This calculates mAP (both VOC mAP @0.5 IoU and COCO style mAP) of an epoch against the validation dataset. 

Key inputs:
* `train_model`: model being trained
* `inference_model`: instance of model in inference mode (must create instance with batch size of 1)
* `dataset`: dataset to run mAP calculations on (validation datset) 
* `dataset_limit`: applies a maximum count of images to calculate mAP on (will use a random subset if dataset has more than maximum count)
* `calculate_map_at_every_X_epoch`: controls control how often mAP is calculated (default of 5)

`2. TrainingLogger()`

Logs training configuration, tracks statistics for each epoch and stage, and stores losses and mAP values

Key inputs:
* `train_model`: model being trained
* `log_dir`: location to save log files (usually model.log_dir)d
* `dataset_train`, `dataset_val`: datasets used for training and validation
* `init_with`: passes the starting weights used to initialize model
* `metric_log_cats`: what metrics or loss you want logged to csv file (`mAP`, `loss`, `general`, `all`). 

The logging categories for saving to csv (`metric_log_cats`) include the following metrics:
* `mAP`: val_AP50 and val_mAP_coco
* `loss`: loss and val_loss
* `general`: loss, val_loss, val_AP50, and val_MAP_coco
* `all`: all losses and mAP values 
    



In [None]:
#------------Custom Callbacks-----------------------

#create a model instance for inference (to be used in mAP callback)
class _InfConfig(Config):
    NAME="SAGE"
    IMAGES_PER_GPU = 1
    GPU_COUNT = 1
    DETECTION_MIN_CONFIDENCE = 0.7
    NUM_CLASSES = 1 + 1  # background + particle + cluster

model_inference = modellib.MaskRCNN(mode="inference", config=_InfConfig(),
                                    model_dir=MODEL_DIR)


#define mAP callback (full class in mrcnn.callbacks.py)
# ----- can control how often mAP is calculated with "calculate_map_at_every_X_epoch"
mean_average_precision_callback = MeanAveragePrecisionCallback(model, model_inference, dataset_val, 
                                                            calculate_map_at_every_X_epoch=1, #how often to calculate mAP (after epoch 3)
                                                            dataset_limit=50, #option to limit dataset size if validation set is quite large
                                                            verbose=1)

#allows for config and logging of key training parameters
training_logger = TrainingLogger(model, model.log_dir, 
                                dataset_train, dataset_val,
                                init_with=init_with, #initial weights, passed from earlier
                                manual_weights_path=manual_path if init_with == "manual" else None,
                                metric_log_cats="general",
                                verbose=1)

## 4. Define training schedule and train
We define the training schedule as a list of stages. Each stage specifies:
* `name`: which layers to train (e.g. `heads` or `all`) -> passes to `layers` argument in `train()`.
* `epochs`: the maximum number of epochs to run in that stage -> passes to `epochs` argument in `train()`. 

In this example, SAGE uses two stages:


* 1. Heads only -> Freeze the backbone and train only the randomly initialized head layers (i.e., the ones not initialized with COCO or other pre-trained weights). This is done by passing `layers="heads"` to the `train()` function from the training stage list. 



* 2. All layers -> Fine-tune the entire network, including the backbone, by passing `layers="all"` from training stage list. 


A loop then iterates over each stage defined in the training schedule, training the model stage by stage according to the layers and epochs defined in the list. 




In [None]:

#define each stage and max epochs. These will be passed in to train() to define the layers trained in each stage, as well as the max epochs of each stage
train_schedule = [
    {"name": "heads", "epochs": 100, "learning_rate": config.LEARNING_RATE},
    {"name": "all", "epochs": 100, "learning_rate": config.LEARNING_RATE/10}
]

total_epochs = sum(stage["epochs"] for stage in train_schedule)
cumulative_epochs = 0

#loop through stages defined in "stages" list:
for stage in train_schedule:
    stage_name = stage["name"]
    stage_epochs = stage["epochs"]

    training_logger.stage_name = stage_name
    training_logger.scheduled_epochs = stage_epochs
    
    #start training
    model.train(dataset_train,dataset_val, 
            learning_rate=stage['learning_rate'], 
            epochs=cumulative_epochs + stage_epochs, 
            layers=stage_name, custom_callbacks=[mean_average_precision_callback, training_logger])
    
    cumulative_epochs += stage_epochs


# Fine-tuning on Manual Images 

After the SAGE<sub>0</sub> is trained, we can use the final weights of that model to initialize a new model that will train on manually annotated real TEM images to fine-tune performance. 

This follows the same training process as before, just with different starting weights and datasets. 

By starting with the weights of the last model `init_with=last` and training on `D1e1_train` and `D1e1_val`, we create SAGE<sub>1</sub>.

This process is then repeated using `D2e1_train` and `D2e1_val` to create our final model, SAGE<sub>2</sub>

In [None]:
#--------- Create NEW model in training mode ------------
model = modellib.MaskRCNN(mode="training", config=config,
                          model_dir=MODEL_DIR)

#--------Load starting weights (from either last model or a defined path)
init_with = "last"  # last or manual
manual_path

if init_with =="manual":
    #load the weights from specified path
    model.load_weights(manual_path, by_name=True)

elif init_with == "last":
    # Load the last model you trained and continue training
    model.load_weights(model.find_last(), by_name=True)

#--------Assign new train/val datasets---------
dataset_train = datasets.get('D1e1_train', None)
dataset_val = datasets.get('D1e1_val', None)

#Adjust step sizes based on batch size
config.STEPS_PER_EPOCH = math.ceil(len(dataset_train.image_ids) / config.BATCH_SIZE)
config.VALIDATION_STEPS = math.ceil(len(dataset_val.image_ids) / config.BATCH_SIZE)

#------------Redefine Custom Callbacks-----------------------

#create a model instance for inference (to be used in mAP callback)
class _InfConfig(Config):
    NAME="SAGE"
    IMAGES_PER_GPU = 1
    GPU_COUNT = 1
    DETECTION_MIN_CONFIDENCE = 0.7
    NUM_CLASSES = 1 + 1  # background + particle + cluster

model_inference = modellib.MaskRCNN(mode="inference", config=_InfConfig(),
                                    model_dir=MODEL_DIR)


#define mAP callback (full class in mrcnn.callbacks.py)
# ----- can control how often mAP is calculated with "calculate_map_at_every_X_epoch"
mean_average_precision_callback = MeanAveragePrecisionCallback(model, model_inference, dataset_val, 
                                                            calculate_map_at_every_X_epoch=1, #how often to calculate mAP (after epoch 3)
                                                            dataset_limit=50, #option to limit dataset size if validation set is quite large
                                                            verbose=1)

#allows for config and logging of key training parameters
training_logger = TrainingLogger(model, model.log_dir, 
                                dataset_train, dataset_val,
                                init_with=init_with, #initial weights, passed from earlier
                                manual_weights_path=manual_path if init_with == "manual" else None,
                                metric_log_cats="general",
                                verbose=1)

#---Degine new training schedule and train
train_schedule = [
    {"name": "heads", "epochs": 100, "learning_rate": config.LEARNING_RATE},
    {"name": "all", "epochs": 100, "learning_rate": config.LEARNING_RATE/10}
]

total_epochs = sum(stage["epochs"] for stage in train_schedule)
cumulative_epochs = 0

#loop through stages defined in "stages" list:
for stage in train_schedule:
    stage_name = stage["name"]
    stage_epochs = stage["epochs"]

    training_logger.stage_name = stage_name
    training_logger.scheduled_epochs = stage_epochs
    
    #start training
    model.train(dataset_train,dataset_val, 
            learning_rate=stage['learning_rate'], 
            epochs=cumulative_epochs + stage_epochs, 
            layers=stage_name, custom_callbacks=[mean_average_precision_callback, training_logger])
    
    cumulative_epochs += stage_epochs

