In [None]:
cd /content/drive/MyDrive/Thesis_Organized

/content/drive/MyDrive/Thesis_Organized


In [None]:
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [None]:
import os
from tqdm import tqdm
main_dir = os.getcwd()
main_dir

'/content/drive/MyDrive/Thesis_Organized'

In [None]:
# Setups
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
torch:  2.2 ; cuda:  cu121
detectron2: 0.6


In [None]:
import detectron2
# from detectron2.utils.logger import setup_logger
# setup_logger()

# import some common libraries
import numpy as np
import cv2
import random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
import os

In [None]:
# importing
from Detectron2.detectron_conf import *
Detectron2_cfg["Momentum"], Detectron2_cfg["base_lr"]

(0.9, 0.00016)

In [None]:
from detectron2.data.datasets import register_coco_instances
register_coco_instances("my_dataset_train", {}, Data_cfg["Coco_labels_train_dir"], os.path.join(Data_cfg["cropped_Images_dir"], "Train"))
register_coco_instances("my_dataset_test", {}, Data_cfg["Coco_labels_test_dir"], os.path.join(Data_cfg["cropped_Images_dir"], "Test"))
register_coco_instances("my_dataset_val", {}, Data_cfg["Coco_labels_val_dir"], os.path.join(Data_cfg["cropped_Images_dir"], "Val"))

In [None]:
from detectron2.engine.hooks import HookBase
from detectron2.evaluation import inference_context
from detectron2.utils.logger import log_every_n_seconds
from detectron2.data import DatasetMapper, build_detection_test_loader
import detectron2.utils.comm as comm
import torch
import time
import datetime
import logging
import numpy as np  # Ensure numpy is imported for mean calculation

class LossEvalHook(HookBase):
    """
    A custom hook for periodically evaluating the loss on a validation dataset during training.

    This hook leverages Detectron2's HookBase to integrate loss evaluation directly into the training loop.
    It computes the loss for each batch in the provided data loader and calculates the mean loss over the entire dataset.
    This mean loss is then logged and stored for monitoring the model's performance on unseen data during training.

    Attributes:
        _model (torch.nn.Module): The model being trained and evaluated.
        _period (int): The evaluation period, i.e., how often (in terms of training iterations) to evaluate.
        _data_loader (iterable): The data loader for the validation dataset.
    """

    def __init__(self, eval_period, model, data_loader):
        """
        Initializes the LossEvalHook.

        Args:
            eval_period (int): The number of training iterations between each evaluation.
            model (torch.nn.Module): The model that will be evaluated.
            data_loader (iterable): The DataLoader providing the validation dataset.
        """
        self._model = model
        self._period = eval_period
        self._data_loader = data_loader

    def _do_loss_eval(self):
        """
        Perform the loss evaluation on the validation dataset.

        Iterates over the validation dataset, computes the loss for each batch, and calculates the mean loss.
        This function also handles logging progress and synchronization in distributed training setups.

        Returns:
            List of loss values for each batch in the validation dataset.
        """
        total = len(self._data_loader)  # Total number of batches
        num_warmup = min(5, total - 1)  # Number of batches to skip for warm-up

        start_time = time.perf_counter()
        total_compute_time = 0
        losses = []  # Store losses for each batch
        for idx, inputs in enumerate(self._data_loader):
            # Reset timing and loss calculation after warm-up period
            if idx == num_warmup:
                start_time = time.perf_counter()
                total_compute_time = 0
            start_compute_time = time.perf_counter()
            if torch.cuda.is_available():
                torch.cuda.synchronize()  # Ensure synchronization in CUDA operations
            total_compute_time += time.perf_counter() - start_compute_time
            iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
            seconds_per_img = total_compute_time / iters_after_start
            # Log progress and ETA after warm-up period
            if idx >= num_warmup * 2 or seconds_per_img > 5:
                total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start
                eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1)))
                log_every_n_seconds(
                    logging.INFO,
                    f"Loss on Validation  done {idx + 1}/{total}. {seconds_per_img:.4f} s / img. ETA={eta}",
                    n=5,
                )
            loss_batch = self._get_loss(inputs)  # Compute loss for the current batch
            losses.append(loss_batch)
        mean_loss = np.mean(losses)  # Calculate mean loss
        self.trainer.storage.put_scalar('validation_loss', mean_loss)
        comm.synchronize()  # Synchronize across all processes

        return losses

    def _get_loss(self, data):
        """
        Calculate and return the loss for a batch of data.

        This method forwards the data through the model and aggregates the loss values.

        Args:
            data (dict): A batch of data to be processed by the model.

        Returns:
            float: The total loss for the batch.
        """
        metrics_dict = self._model(data)
        # Ensure all metrics are scalars and detach any tensors from the graph
        metrics_dict = {
            k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v)
            for k, v in metrics_dict.items()
        }
        total_losses_reduced = sum(loss for loss in metrics_dict.values())  # Sum up the losses
        return total_losses_reduced

    def after_step(self):
        next_iter = self.trainer.iter + 1
        is_final = next_iter == self.trainer.max_iter
        if is_final or (self._period > 0 and next_iter % self._period == 0):
            self._do_loss_eval()
        self.trainer.storage.put_scalars(timetest=12)

In [None]:
from detectron2.data import DatasetMapper, build_detection_test_loader, build_detection_train_loader
from detectron2.engine import DefaultTrainer
from detectron2.data.transforms import RandomApply, RandomBrightness, RandomRotation, RandomFlip, RandomCrop, RandomContrast
from detectron2.config import CfgNode
from detectron2.solver.build import get_default_optimizer_params, maybe_add_gradient_clipping
import torch

class CustomTrainer(DefaultTrainer):
    """
    This class extends Detectron2's DefaultTrainer to include custom behavior for the training process.
    It allows for the addition of a loss evaluation hook to periodically assess the model's performance
    on a validation or test set during training. Moreover, it customizes the data loading with specific
    data augmentations and utilizes a custom optimizer configuration.
    """

    def build_hooks(self):
        """
        Overrides the DefaultTrainer's build_hooks method to insert a custom hook for evaluating
        the loss on a validation or test set during the training process. This enables monitoring
        the model's performance beyond the training set, providing insights into its generalization capabilities.

        Returns:
            List[HookBase]: A list of hooks including the custom LossEvalHook for periodic loss evaluation.
        """
        # First, call the parent class's build_hooks method to get the default set of hooks.
        hooks = super().build_hooks()

        # Insert the custom LossEvalHook before the last hook.
        # This ensures that the loss evaluation is performed at the specified intervals.
        hooks.insert(-1, LossEvalHook(
            eval_period=20,  # Specifies the interval (in terms of training iterations) for performing loss evaluation.
            model=self.model,  # Passes the current model for evaluation.
            data_loader=build_detection_test_loader(
                self.cfg,
                self.cfg.DATASETS.TEST[0],  # Specifies the dataset used for evaluation.
                DatasetMapper(self.cfg, is_train=True)  # Uses the DatasetMapper with the current configuration.
            )
        ))

        return hooks

    @classmethod
    def build_train_loader(cls, cfg):
        """
        Customizes the training DataLoader by specifying data augmentations that are applied
        to the training dataset. This method enhances the model's ability to generalize by introducing
        variability into the training data.

        Args:
            cfg (CfgNode): Configuration node containing settings for data loading and augmentations.

        Returns:
            DataLoader: A DataLoader for training, configured with custom data augmentations.
        """
        # Defines a mapper that applies a series of data augmentations to each training example.
        mapper = DatasetMapper(cfg, is_train=True, augmentations=[
            # Randomly applies brightness adjustment with the specified probability.
            RandomApply(RandomBrightness(*Augmentation_cfg["RandomBrightness"][:-1]), Augmentation_cfg["RandomBrightness"][-1]),
            # Randomly applies rotation with the specified probability.
            RandomApply(RandomRotation(angle=Augmentation_cfg["RandomRotation"][:-1]), Augmentation_cfg["RandomRotation"][-1]),
            # Randomly applies horizontal flip with the specified probability.
            RandomApply(RandomFlip(), Augmentation_cfg["RandomFlip"][0]),
            # Randomly applies cropping with the specified probability.
            RandomApply(RandomCrop("relative", Augmentation_cfg["RandomCrop"][:-1]), Augmentation_cfg["RandomCrop"][-1]),
            # Randomly applies contrast adjustment with the specified probability.
            RandomApply(RandomContrast(*Augmentation_cfg["RandomContrast"][:-1]), Augmentation_cfg["RandomContrast"][-1])
        ])

        # Builds and returns a DataLoader using the defined mapper for data augmentation.
        return build_detection_train_loader(cfg, mapper=mapper)

    @classmethod
    def build_optimizer(cls, cfg: CfgNode, model: torch.nn.Module) -> torch.optim.Optimizer:
        """
        Configures and returns a custom optimizer for the model, using the AdamW algorithm.
        This method allows for custom settings for the optimizer, including learning rates, weight decay, and possibly
        gradient clipping, based on the provided configuration.

        Args:
            cfg (CfgNode): Configuration node containing optimizer settings.
            model (torch.nn.Module): The model for which the optimizer is being configured.

        Returns:
            torch.optim.Optimizer: An instance of the AdamW optimizer with configured parameters.
        """
        # Retrieves default parameters for optimizer setup, including learning rate and weight decay adjustments.
        params = get_default_optimizer_params(
            model,
            base_lr=cfg.SOLVER.BASE_LR,
            weight_decay_norm=cfg.SOLVER.WEIGHT_DECAY_NORM,
            bias_lr_factor=cfg.SOLVER.BIAS_LR_FACTOR,
            weight_decay=cfg.SOLVER.WEIGHT_DECAY,
        )

In [None]:
from detectron2.engine import DefaultTrainer
cfg = get_cfg()

In [None]:
cfg

CfgNode({'VERSION': 2, 'MODEL': CfgNode({'LOAD_PROPOSALS': False, 'MASK_ON': False, 'KEYPOINT_ON': False, 'DEVICE': 'cuda', 'META_ARCHITECTURE': 'GeneralizedRCNN', 'WEIGHTS': '', 'PIXEL_MEAN': [103.53, 116.28, 123.675], 'PIXEL_STD': [1.0, 1.0, 1.0], 'BACKBONE': CfgNode({'NAME': 'build_resnet_backbone', 'FREEZE_AT': 2}), 'FPN': CfgNode({'IN_FEATURES': [], 'OUT_CHANNELS': 256, 'NORM': '', 'FUSE_TYPE': 'sum'}), 'PROPOSAL_GENERATOR': CfgNode({'NAME': 'RPN', 'MIN_SIZE': 0}), 'ANCHOR_GENERATOR': CfgNode({'NAME': 'DefaultAnchorGenerator', 'SIZES': [[32, 64, 128, 256, 512]], 'ASPECT_RATIOS': [[0.5, 1.0, 2.0]], 'ANGLES': [[-90, 0, 90]], 'OFFSET': 0.0}), 'RPN': CfgNode({'HEAD_NAME': 'StandardRPNHead', 'IN_FEATURES': ['res4'], 'BOUNDARY_THRESH': -1, 'IOU_THRESHOLDS': [0.3, 0.7], 'IOU_LABELS': [0, -1, 1], 'BATCH_SIZE_PER_IMAGE': 256, 'POSITIVE_FRACTION': 0.5, 'BBOX_REG_LOSS_TYPE': 'smooth_l1', 'BBOX_REG_LOSS_WEIGHT': 1.0, 'BBOX_REG_WEIGHTS': (1.0, 1.0, 1.0, 1.0), 'SMOOTH_L1_BETA': 0.0, 'LOSS_WEIGH

In [None]:
cfg["MODEL"]['BACKBONE'] = CfgNode({'NAME': 'build_resnet_backbone', 'FREEZE_AT': 0})
cfg.OUTPUT_DIR = "./Detectron2/Test_results/SGD0.99_001_00_3x101"
cfg.merge_from_file(model_zoo.get_config_file(MODELS_LIST[1]))
cfg.DATASETS.TRAIN = ("my_dataset_train",)
cfg.DATASETS.TEST = ("my_dataset_test",)
cfg.SOLVER.MOMENTUM = 0.99 #Detectron2_cfg["Momentum"]
cfg.SOLVER.CHECKPOINT_PERIOD = 200  # The network takes a checkpoint once it finishes of every 200 iterations
# cfg.TEST.EVAL_PERIOD = 20
cfg.DATALOADER.NUM_WORKERS = NUM_WORKERS
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(MODELS_LIST[1])  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = IMS_PER_BATCH  # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = 0.001 # Detectron2_cfg["base_lr"]  # pick a good LR
cfg.SOLVER.MAX_ITER = 1500
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = BATCH_SIZE_PER_IMAGE  # The "RoIHead batch size". 128 is faster, and good enough for this dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = NUM_CLASSES  # only has one class (Track). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.

cfg.MODEL.PIXEL_MEAN = [26.9]
cfg.MODEL.PIXEL_STD = [34.4]
# for grayscale images
# if it gives error just remove it and make mean and std 3 values that are equal [26.9, 26.9, 26.9]
cfg.INPUT.FORMAT = "L"

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
# trainer = DefaultTrainer(cfg)
trainer = CustomTrainer(cfg)
trainer.resume_or_load(resume=True)
trainer.train()

CfgNode({'NAME': 'build_resnet_backbone', 'FREEZE_AT': 0})