
# **Trained_Detectron2_Monday_final**

## Installing libraries

In [1]:
# Install required libraries
!pip install pyyaml==5.1
!pip install torch torchvision
!apt-get install tesseract-ocr-all
!pip install pytesseract
!pip install pdf2image
!apt-get install -y poppler-utils
!pip install PyPDF2 pdf2image pytesseract
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

Collecting pyyaml==5.1
  Downloading PyYAML-5.1.tar.gz (274 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/274.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m274.2/274.2 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25h  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py egg_info[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Preparing metadata (setup.py) ... [?25l[?25herror
[1;31merror[0m: [1mmetadata-generation-failed[0m

[31m×[0m Encountered error while generating package metadata.
[31m╰─>[0m See above for output.

[1;35mnote[0m: This is an issue with the package mentioned above, not pip.
[1;36mhint[0m: See above for details.
Reading package lists... Done
Building dependency tree... Do

## Import necessary libraries

In [5]:
import torch
import os
import cv2
import numpy as np
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2.utils.logger import setup_logger
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances
import pytesseract
from pytesseract import Output
import zipfile
from pdf2image import convert_from_path  # Added this import for PDF conversion
from detectron2.data import transforms as T
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
from google.colab import drive  # For Google Drive mounting

# Mount Google Drive to store checkpoints
drive.mount('/content/drive')

# Setup logger
setup_logger()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


<Logger detectron2 (DEBUG)>

## Early stopping configuration

In [7]:
class EarlyStoppingTrainer(DefaultTrainer):
    def __init__(self, cfg, patience=5, min_delta=0.0):
        super().__init__(cfg)
        self.patience = patience
        self.min_delta = min_delta
        self.best_loss = float("inf")
        self.counter = 0

    def evaluate_and_early_stop(self):
        # Evaluate on the validation set
        evaluator = COCOEvaluator("my_dataset_val", cfg, False, output_dir="./output/")
        val_loader = build_detection_test_loader(self.cfg, "my_dataset_val")
        metrics = inference_on_dataset(self.model, val_loader, evaluator)

        # Get validation loss (you can choose a different metric if you prefer)
        validation_loss = metrics["bbox"].get("AP", None)


        # Early stopping check
        if self.best_loss - validation_loss > self.min_delta:
            self.best_loss = validation_loss
            self.counter = 0  # Reset the patience counter
        else:
            self.counter += 1

        # If validation loss hasn't improved for 'patience' evaluations, stop training
        if self.counter >= self.patience:
            print(f"Early stopping triggered. Stopping training after {self.counter} evaluations without improvement.")
            return True
        return False

    def train(self):
        super().train()
        for iteration in range(cfg.SOLVER.MAX_ITER):
            self.iteration = iteration
            should_stop = self.evaluate_and_early_stop()
            if should_stop:
                break

 ## Extract the dataset from the zip file using Python's zipfile module

In [8]:
zip_path = '/content/data.zip'  # Path to the ZIP file
extract_dir = '/content/train'  # Path where you want to extract the data

# Ensure the zip file exists
if os.path.exists(zip_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall('/content')

## Register your custom dataset in COCO format (Train and Validation)

In [9]:
register_coco_instances("my_dataset_train", {}, "/content/train/instances_Train.json", "/content/train")
register_coco_instances("my_dataset_val", {}, "/content/test/instances_Test.json", "/content/test")  # Assuming you have a separate test set

## Configure the model for training

In [10]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("my_dataset_train",)
cfg.DATASETS.TEST = ("my_dataset_val",)
cfg.DATALOADER.NUM_WORKERS = 4  # Increased to improve data loading efficiency
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")  # Initialize from pre-trained model
#cfg.MODEL.WEIGHTS = "/content/model_01937.pth"

## Adjustments for better performance

In [11]:
cfg.SOLVER.IMS_PER_BATCH = 16  # Increased batch size
cfg.SOLVER.BASE_LR = 5e-05  # Reduced learning rate for more stable training
cfg.SOLVER.MAX_ITER = 500  # Increased iterations for better convergence
cfg.SOLVER.WARMUP_ITERS = 1000  # Add warmup to help the model stabilize
cfg.SOLVER.STEPS = (120000, 160000)  # Steps for LR decay
cfg.SOLVER.GAMMA = 0.1  # Learning rate decay factor
cfg.SOLVER.WARMUP_FACTOR = 0.001
cfg.SOLVER.CLIP_GRADIENTS.ENABLED = True  # Enable gradient clipping
cfg.SOLVER.CLIP_GRADIENTS.CLIP_VALUE = 1.0  # Clip gradients to avoid exploding gradients
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512  # Increased batch size for training
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 7  # Update this number based on the number of classes in your dataset

# Set checkpoint saving period to every 100 iterations
cfg.SOLVER.CHECKPOINT_PERIOD = 100  # Save checkpoint every 100 iterations

# Set the output directory to Google Drive to save checkpoints
cfg.OUTPUT_DIR = "/content/drive/MyDrive/checkpoints"  # Save checkpoints in Google Drive
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

## Data augmentations for better generalization


In [12]:
cfg.INPUT.AUGMENTATIONS = [
    T.ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'),
    T.RandomFlip(),
    T.RandomBrightness(0.9, 1.1),
    T.RandomContrast(0.9, 1.1),
    T.RandomRotation(angle=[-10, 10])
]

## Output directory and start training

In [13]:
early_stopping_trainer = EarlyStoppingTrainer(cfg, patience=5, min_delta=0.01)

[09/30 18:10:18 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res



In [14]:
early_stopping_trainer.resume_or_load(resume=False)


[09/30 18:10:37 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl ...


model_final_280758.pkl: 167MB [00:00, 188MB/s]                           
roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}


In [15]:
early_stopping_trainer.train()

[09/30 18:10:50 d2.engine.train_loop]: Starting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


ERROR [09/30 18:12:04 d2.engine.train_loop]: Exception during training:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/detectron2/engine/train_loop.py", line 155, in train
    self.run_step()
  File "/usr/local/lib/python3.10/dist-packages/detectron2/engine/defaults.py", line 530, in run_step
    self._trainer.run_step()
  File "/usr/local/lib/python3.10/dist-packages/detectron2/engine/train_loop.py", line 322, in run_step
    losses.backward()
  File "/usr/local/lib/python3.10/dist-packages/torch/_tensor.py", line 521, in backward
    torch.autograd.backward(
  File "/usr/local/lib/python3.10/dist-packages/torch/autograd/__init__.py", line 289, in backward
    _engine_run_backward(
  File "/usr/local/lib/python3.10/dist-packages/torch/autograd/graph.py", line 769, in _engine_run_backward
    return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
torch.OutOfMemoryError: CUDA out of memory. Tried to alloc

OutOfMemoryError: CUDA out of memory. Tried to allocate 900.00 MiB. GPU 0 has a total capacity of 14.75 GiB of which 869.06 MiB is free. Process 2489 has 13.90 GiB memory in use. Of the allocated memory 11.39 GiB is allocated by PyTorch, and 2.37 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)