# Dependencies

In [None]:
import IPython.display as display
!pip uninstall -y requests

!pip install -U urllib3 requests
display.clear_output()

In [None]:
!pip uninstall -y torch torchvision
!pip install torch==2.0.0 torchvision==0.15.1

In [None]:
!pip install -U openmim
!pip install "mmengine>=0.7.1,<1.0.0" \
 "mmcv>=2.0.0rc4,<2.1.0" \
 -f https://download.openmmlab.com/mmcv/dist/cu117/torch2.0.0/index.html \
--trusted-host download.openmmlab.com

In [None]:
# Install mmdetection
!rm -rf mmdetection
!git clone https://github.com/open-mmlab/mmdetection.git
%cd mmdetection

%pip install -e .

In [None]:
#copying the dataset to mmyolo directory

import shutil
import os

# Source directory to copy
src_dir = '/kaggle/input/loserspcb-v2/combined_trialv4_updated'

# Destination directory where the source directory will be copied
dst_dir = '/kaggle/working/mmdetection/datasets'

# Remove destination directory if it exists
if os.path.exists(dst_dir):
    shutil.rmtree(dst_dir)

# Copy the entire directory tree
shutil.copytree(src_dir, dst_dir)

# Config modification 

In [None]:
config_coco = """
# dataset settings
dataset_type = 'CocoDataset'
data_root = './datasets/'

train_batch_size_per_gpu = 4
val_batch_size_per_gpu = 2
train_num_workers = 1 
val_num_workers = 1 
metainfo = {
    'classes': ('MP','OC','SC','SP','SPC')
    }
backend_args = None

train_pipeline = [
    dict(type='LoadImageFromFile', backend_args=backend_args),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='Resize', scale=(1024, 1024), keep_ratio=True),
    dict(type='PackDetInputs')
]
test_pipeline = [
    dict(type='LoadImageFromFile', backend_args=backend_args),
    dict(type='Resize', scale=(1024, 1024), keep_ratio=True),
    # If you don't have a gt annotation, delete the pipeline
    dict(type='LoadAnnotations', with_bbox=True),
    dict(
        type='PackDetInputs',
        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
                   'scale_factor'))
]
train_dataloader = dict(
    batch_size=train_batch_size_per_gpu,
    num_workers=train_num_workers,
    persistent_workers=True,
    sampler=dict(type='DefaultSampler', shuffle=True),
    batch_sampler=dict(type='AspectRatioBatchSampler'),
    dataset=dict(
        type=dataset_type,
        data_root=data_root,
        metainfo=metainfo,
        ann_file='train.json',
        data_prefix=dict(img='train/'),
        filter_cfg=dict(filter_empty_gt=True, min_size=32),
        pipeline=train_pipeline,
        backend_args=backend_args))
val_dataloader = dict(
    batch_size=val_batch_size_per_gpu,
    num_workers=val_num_workers,
    persistent_workers=True,
    drop_last=False,
    sampler=dict(type='DefaultSampler', shuffle=False),
    dataset=dict(
        type=dataset_type,
        data_root=data_root,
        metainfo=metainfo,
        ann_file='val.json',
        data_prefix=dict(img='val/'),
        test_mode=True,
        pipeline=test_pipeline,
        backend_args=backend_args))
test_dataloader = val_dataloader

val_evaluator = dict(
    type='CocoMetric',
    ann_file=data_root + 'val.json',
    classwise=True,
    metric='bbox',
    format_only=False,
    backend_args=backend_args)
test_evaluator = val_evaluator

"""
with open('./configs/_base_/datasets/coco_detection.py', 'w') as f:
    f.write(config_coco)

In [None]:
#for visualising the config
from mmengine import Config
import json
cfg = Config.fromfile('/kaggle/working/mmdetection/configs/_base_/datasets/coco_detection.py')
formatted_cfg = json.dumps(cfg._cfg_dict, indent=4)

print(formatted_cfg)

In [None]:
config_pcb_defect = """

_base_ = '../faster_rcnn/faster-rcnn_r50_fpn_1x_coco.py'
model = dict(
    backbone=dict(
        plugins=[
            dict(
                cfg=dict(
                    type='GeneralizedAttention',
                    spatial_range=-1,
                    num_heads=8,
                    attention_type='0010',
                    kv_stride=2),
                stages=(False, False, True, True),
                position='after_conv2')
        ],
        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),
        stage_with_dcn=(False, True, True, True)))

max_epochs = 50

train_cfg = dict(
    type='EpochBasedTrainLoop',
    max_epochs=max_epochs,
    val_interval=2)

# hooks
default_hooks = dict(
    checkpoint=dict(
        type='CheckpointHook',
        interval=5,
        max_keep_ckpts=1,
        save_best = 'auto'),
        logger=dict(type='LoggerHook', interval=5))     # only keep latest 1 checkpoints

"""
with open('./configs/empirical_attention/faster-rcnn_r50-attn0010-dcn_fpn_1x_pcb_defect.py', 'w') as f:
    f.write(config_pcb_defect)

In [None]:
#for visualising the config
from mmengine import Config
import json
cfg = Config.fromfile('configs/empirical_attention/faster-rcnn_r50-attn0010-dcn_fpn_1x_pcb_defect.py')
formatted_cfg = json.dumps(cfg._cfg_dict, indent=4)

print(formatted_cfg)

In [None]:
# saving the config file
from mmengine import Config
import json

# Load the configuration from file
cfg = Config.fromfile('configs/empirical_attention/faster-rcnn_r50-attn0010-dcn_fpn_1x_pcb_defect.py')

# Convert the configuration to a dictionary and then to a formatted JSON string
formatted_cfg = json.dumps(cfg._cfg_dict, indent=4)

# Define the output file path
output_file = '/kaggle/working/formatted_config.json'

# Save the formatted JSON string to a file
with open(output_file, 'w') as f:
    f.write(formatted_cfg)

print(f'Configuration saved as {output_file}')

# Visualization 

In [None]:

# # Basically, as the browse_dataset code of mmdet does not work
# # i had to copy the browse_dataset code of mmyolo here, one line had to change
# # from mmyolo.registry import DATASETS, VISUALIZERS >>> from mmdet.registry import DATASETS, VISUALIZERS

# code ='''
# # Copyright (c) OpenMMLab. All rights reserved.
# import argparse
# import os.path as osp
# import sys
# from typing import Tuple

# import cv2
# import mmcv
# import numpy as np
# from mmdet.models.utils import mask2ndarray
# from mmdet.structures.bbox import BaseBoxes
# from mmengine.config import Config, DictAction
# from mmengine.dataset import Compose
# from mmengine.registry import init_default_scope
# from mmengine.utils import ProgressBar
# from mmengine.visualization import Visualizer

# from mmdet.registry import DATASETS, VISUALIZERS


# # TODO: Support for printing the change in key of results
# # TODO: Some bug. If you meet some bug, please use the original
# def parse_args():
#     parser = argparse.ArgumentParser(description='Browse a dataset')
#     parser.add_argument('config', help='train config file path')
#     parser.add_argument(
#         '--phase',
#         '-p',
#         default='train',
#         type=str,
#         choices=['train', 'test', 'val'],
#         help='phase of dataset to visualize, accept "train" "test" and "val".'
#         ' Defaults to "train".')
#     parser.add_argument(
#         '--mode',
#         '-m',
#         default='transformed',
#         type=str,
#         choices=['original', 'transformed', 'pipeline'],
#         help='display mode; display original pictures or '
#         'transformed pictures or comparison pictures. "original" '
#         'means show images load from disk; "transformed" means '
#         'to show images after transformed; "pipeline" means show all '
#         'the intermediate images. Defaults to "transformed".')
#     parser.add_argument(
#         '--out-dir',
#         default='output',
#         type=str,
#         help='If there is no display interface, you can save it.')
#     parser.add_argument('--not-show', default=False, action='store_true')
#     parser.add_argument(
#         '--show-number',
#         '-n',
#         type=int,
#         default=sys.maxsize,
#         help='number of images selected to visualize, '
#         'must bigger than 0. if the number is bigger than length '
#         'of dataset, show all the images in dataset; '
#         'default "sys.maxsize", show all images in dataset')
#     parser.add_argument(
#         '--show-interval',
#         '-i',
#         type=float,
#         default=3,
#         help='the interval of show (s)')
#     parser.add_argument(
#         '--cfg-options',
#         nargs='+',
#         action=DictAction,
#         help='override some settings in the used config, the key-value pair '
#         'in xxx=yyy format will be merged into config file. If the value to '
#         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
#         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
#         'Note that the quotation marks are necessary and that no white space '
#         'is allowed.')
#     args = parser.parse_args()
#     return args


# def _get_adaptive_scale(img_shape: Tuple[int, int],
#                         min_scale: float = 0.3,
#                         max_scale: float = 3.0) -> float:
#     """Get adaptive scale according to image shape.

#     The target scale depends on the the short edge length of the image. If the
#     short edge length equals 224, the output is 1.0. And output linear
#     scales according the short edge length. You can also specify the minimum
#     scale and the maximum scale to limit the linear scale.

#     Args:
#         img_shape (Tuple[int, int]): The shape of the canvas image.
#         min_scale (int): The minimum scale. Defaults to 0.3.
#         max_scale (int): The maximum scale. Defaults to 3.0.
#     Returns:
#         int: The adaptive scale.
#     """
#     short_edge_length = min(img_shape)
#     scale = short_edge_length / 224.
#     return min(max(scale, min_scale), max_scale)


# def make_grid(imgs, names):
#     """Concat list of pictures into a single big picture, align height here."""
#     visualizer = Visualizer.get_current_instance()
#     ori_shapes = [img.shape[:2] for img in imgs]
#     max_height = int(max(img.shape[0] for img in imgs) * 1.1)
#     min_width = min(img.shape[1] for img in imgs)
#     horizontal_gap = min_width // 10
#     img_scale = _get_adaptive_scale((max_height, min_width))

#     texts = []
#     text_positions = []
#     start_x = 0
#     for i, img in enumerate(imgs):
#         pad_height = (max_height - img.shape[0]) // 2
#         pad_width = horizontal_gap // 2
#         # make border
#         imgs[i] = cv2.copyMakeBorder(
#             img,
#             pad_height,
#             max_height - img.shape[0] - pad_height + int(img_scale * 30 * 2),
#             pad_width,
#             pad_width,
#             cv2.BORDER_CONSTANT,
#             value=(255, 255, 255))
#         texts.append(f'{"execution: "}{i}\\n{names[i]}\\n{ori_shapes[i]}')

#         text_positions.append(
#             [start_x + img.shape[1] // 2 + pad_width, max_height])
#         start_x += img.shape[1] + horizontal_gap

#     display_img = np.concatenate(imgs, axis=1)
#     visualizer.set_image(display_img)
#     img_scale = _get_adaptive_scale(display_img.shape[:2])
#     visualizer.draw_texts(
#         texts,
#         positions=np.array(text_positions),
#         font_sizes=img_scale * 7,
#         colors='black',
#         horizontal_alignments='center',
#         font_families='monospace')
#     return visualizer.get_image()


# def swap_pipeline_position(dataset_cfg):
#     load_ann_tfm_name = 'LoadAnnotations'
#     pipeline = dataset_cfg.get('pipeline')
#     if (pipeline is None):
#         return dataset_cfg
#     all_transform_types = [tfm['type'] for tfm in pipeline]
#     if load_ann_tfm_name in all_transform_types:
#         load_ann_tfm_index = all_transform_types.index(load_ann_tfm_name)
#         load_ann_tfm = pipeline.pop(load_ann_tfm_index)
#         pipeline.insert(1, load_ann_tfm)


# class InspectCompose(Compose):
#     """Compose multiple transforms sequentially.

#     And record "img" field of all results in one list.
#     """

#     def __init__(self, transforms, intermediate_imgs):
#         super().__init__(transforms=transforms)
#         self.intermediate_imgs = intermediate_imgs

#     def __call__(self, data):
#         if 'img' in data:
#             self.intermediate_imgs.append({
#                 'name': 'original',
#                 'img': data['img'].copy()
#             })
#         self.ptransforms = [
#             self.transforms[i] for i in range(len(self.transforms) - 1)
#         ]
#         for t in self.ptransforms:
#             data = t(data)
#             # Keep the same meta_keys in the PackDetInputs
#             self.transforms[-1].meta_keys = [key for key in data]
#             data_sample = self.transforms[-1](data)
#             if data is None:
#                 return None
#             if 'img' in data:
#                 self.intermediate_imgs.append({
#                     'name':
#                     t.__class__.__name__,
#                     'dataset_sample':
#                     data_sample['data_samples']
#                 })
#         return data


# def main():
#     args = parse_args()
#     cfg = Config.fromfile(args.config)
#     if args.cfg_options is not None:
#         cfg.merge_from_dict(args.cfg_options)

#     init_default_scope(cfg.get('default_scope', 'mmyolo'))

#     dataset_cfg = cfg.get(args.phase + '_dataloader').get('dataset')
#     if (args.phase in ['test', 'val']):
#         swap_pipeline_position(dataset_cfg)
#     dataset = DATASETS.build(dataset_cfg)
#     visualizer = VISUALIZERS.build(cfg.visualizer)
#     visualizer.dataset_meta = dataset.metainfo

#     intermediate_imgs = []

#     if not hasattr(dataset, 'pipeline'):
#         # for dataset_wrapper
#         dataset = dataset.dataset

#     # TODO: The dataset wrapper occasion is not considered here
#     dataset.pipeline = InspectCompose(dataset.pipeline.transforms,
#                                       intermediate_imgs)

#     # init visualization image number
#     assert args.show_number > 0
#     display_number = min(args.show_number, len(dataset))

#     progress_bar = ProgressBar(display_number)
#     for i, item in zip(range(display_number), dataset):
#         image_i = []
#         result_i = [result['dataset_sample'] for result in intermediate_imgs]
#         for k, datasample in enumerate(result_i):
#             image = datasample.img
#             gt_instances = datasample.gt_instances
#             image = image[..., [2, 1, 0]]  # bgr to rgb
#             gt_bboxes = gt_instances.get('bboxes', None)
#             if gt_bboxes is not None and isinstance(gt_bboxes, BaseBoxes):
#                 gt_instances.bboxes = gt_bboxes.tensor
#             gt_masks = gt_instances.get('masks', None)
#             if gt_masks is not None:
#                 masks = mask2ndarray(gt_masks)
#                 gt_instances.masks = masks.astype(bool)
#                 datasample.gt_instances = gt_instances
#             # get filename from dataset or just use index as filename
#             visualizer.add_datasample(
#                 'result',
#                 image,
#                 datasample,
#                 draw_pred=False,
#                 draw_gt=True,
#                 show=False)
#             image_show = visualizer.get_image()
#             image_i.append(image_show)

#         if args.mode == 'original':
#             image = image_i[0]
#         elif args.mode == 'transformed':
#             image = image_i[-1]
#         else:
#             image = make_grid([result for result in image_i],
#                               [result['name'] for result in intermediate_imgs])

#         if hasattr(datasample, 'img_path'):
#             filename = osp.basename(datasample.img_path)
#         else:
#             # some dataset have not image path
#             filename = f'{i}.jpg'
#         out_file = osp.join(args.out_dir,
#                             filename) if args.out_dir is not None else None

#         if out_file is not None:
#             mmcv.imwrite(image[..., ::-1], out_file)

#         if not args.not_show:
#             visualizer.show(
#                 image, win_name=filename, wait_time=args.show_interval)

#         intermediate_imgs.clear()
#         progress_bar.update()


# if __name__ == '__main__':
#     main()
# '''

# with open('./tools/analysis_tools/browse_dataset_2.py', 'w') as f:
#     f.write(code)

In [None]:
# !python tools/analysis_tools/browse_dataset_2.py configs/empirical_attention/faster-rcnn_r50-attn0010-dcn_fpn_1x_pcb_defect.py --mode pipeline --out-dir dataset_check_3 --show-number 3 --show-interval 12

# Training 

In [None]:
!bash ./tools/dist_train.sh configs/empirical_attention/faster-rcnn_r50-attn0010-dcn_fpn_1x_pcb_defect.py 2 --work-dir faster-rcnn_loserspcb_50e/

In [None]:
# Just to look at the converted data

import shutil

# Directory to be zipped
directory_to_zip = '/kaggle/working/mmdetection/faster-rcnn_loserspcb_50e'

# Destination zip file path
zip_file_path = '/kaggle/working/faster-rcnn_loserspcb_50e.zip'

# Create a zip file
shutil.make_archive(zip_file_path[:-4], 'zip', directory_to_zip)

# Testing

In [None]:
# !python tools/test.py \
# configs/empirical_attention/faster-rcnn_r50-attn0010-dcn_fpn_1x_pcb_defect.py \
# /kaggle/input/inference/best_coco_SH_precision_epoch_30.pth\
# --work-dir results/evaluate \
# --out results/results.pkl

# Flops

In [None]:
# #I modified the get_flops scripts so that it works

# import shutil

# #Define the source file path and the destination file path
# source = '/kaggle/input/get-flops-2/get_flops_2.py'
# destination = '/kaggle/working/mmdetection/tools/analysis_tools/get_flops_2.py'

# #Copy the file
# shutil.copy(source, destination)

In [None]:
# !python tools/analysis_tools/get_flops_2.py configs/empirical_attention/faster-rcnn_r50-attn0010-dcn_fpn_1x_pcb_defect.py

# Benchmarking

In [None]:
# !python tools/analysis_tools/benchmark.py configs/empirical_attention/faster-rcnn_r50-attn0010-dcn_fpn_1x_pcb_defect.py \
# --checkpoint /kaggle/input/inference/best_coco_SH_precision_epoch_30.pth \
# --task inference \
# --repeat-num 5 \
# --max-iter 100 \
# --log-interval 50 \
# --num-warmup 10 \
# --work-dir ./results

# Confusion Matrix

In [None]:

# import shutil

# # Define the source file path and the destination file path
# source = '/kaggle/input/confusion-matrix/confusion_matrix.py'
# destination = '/kaggle/working/mmdetection/tools/analysis_tools/confusion_matrix_4.py'

# # Copy the file
# shutil.copy(source, destination)

In [None]:
# !python tools/analysis_tools/confusion_matrix_4.py \
# configs/empirical_attention/faster-rcnn_r50-attn0010-dcn_fpn_1x_pcb_defect.py  \
# results/results.pkl  \
# ./results \
# --show \
# --score-thr 0.5 \
# --tp-iou-thr 0.5

# Results saving

In [None]:
# import shutil

# # Directory to be zipped
# directory_to_zip = '/kaggle/working/mmdetection/results'

# # Destination zip file path
# zip_file_path = '/kaggle/working/faster-ecnn_dspcbsd_results.zip'

# # Create a zip file
# shutil.make_archive(zip_file_path[:-4], 'zip', directory_to_zip)

# Inference 

In [None]:
# from mmdet.apis import DetInferencer
# import glob

# # Choose to use a config
# config = 'configs/empirical_attention/faster-rcnn_r50-attn0010-dcn_fpn_1x_pcb_defect.py'
# # Setup a checkpoint file to load
# checkpoint = '/kaggle/input/inference/best_coco_SH_precision_epoch_30.pth'

# # Set the device to be used for evaluation
# device = 'cuda:0'

# # Initialize the DetInferencer
# inferencer = DetInferencer(config, checkpoint, device)

# # Use the detector to do inference
# img = '/kaggle/input/dspcbsd-coco/Data_COCO/val/0045409.jpg'

# result = inferencer(img, out_dir='./output',no_save_pred=False, pred_score_thr=0.5)

In [None]:
# # Show the output image
# from PIL import Image
# Image.open('./output/vis/0045409.jpg')