# Import

In [1]:
import mmdet
import mmcv
import mmengine
from mmengine import Config
from mmengine.runner import set_random_seed
#from mmdet.datasets import build_dataset
#from mmdet.models import build_detector
from mmengine.runner import Runner
from mmdet.apis import init_detector, inference_detector
from mmengine.visualization import Visualizer

In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt
import json

import warnings
warnings.filterwarnings('ignore') #Ignore "future" warnings and Data-Frame-Slicing warnings.

from sklearn.model_selection import KFold, StratifiedKFold
from pytorch_lightning import seed_everything

# Config

In [3]:
class CFG:
    class general:
        #project_name = "HuBMAP2023"
        input_path = "../data/input"
        output_path = "../data/output"
        save_name = "convnext_all_pseudo2_095_lr-7_10000"
        seed = 0
        cv = True
        #wandb_desabled = True
        n_splits = 5
        fold = [0] # list (0-idx start) or null. Set one element list, hold-out mode.

# Main

### Read data

In [4]:
tile_meta = pd.read_csv(f"{CFG.general.input_path}/tile_meta.csv")
polygons = pd.read_json(f"{CFG.general.input_path}/polygons.jsonl", lines=True)
train = pd.merge(polygons, tile_meta, on="id", how="left")
print(len(train))

1633


In [5]:
train = train[train["dataset"]==1].reset_index(drop=True) # using only dataset1
print(len(train))

422


In [6]:
with open(f"{CFG.general.input_path}/coco_annotations_all.json") as f:
    coco_json = json.load(f)

In [7]:
coco_info = pd.DataFrame(coco_json["info"])
coco_licenses = pd.DataFrame(coco_json["licenses"])
coco_categories = pd.DataFrame(coco_json["categories"])
coco_images = pd.DataFrame(coco_json["images"])
coco_annotations = pd.DataFrame(coco_json["annotations"])

In [8]:
with open(f"{CFG.general.input_path}/coco_annotations_pseudo2_c095_convnext_train_all.json") as f:
    coco_pseudo_json = json.load(f)

In [9]:
coco_info_pseudo = pd.DataFrame(coco_pseudo_json["info"])
coco_licenses_pseudo = pd.DataFrame(coco_pseudo_json["licenses"])
coco_categories_pseudo = pd.DataFrame(coco_pseudo_json["categories"])
coco_images_pseudo = pd.DataFrame(coco_pseudo_json["images"])
coco_annotations_pseudo = pd.DataFrame(coco_pseudo_json["annotations"])

In [10]:
coco_images_train = coco_images[coco_images["id"].isin(train["id"])].reset_index(drop=True)
coco_annotations_train = coco_annotations[coco_annotations["image_id"].isin(train["id"])].reset_index(drop=True)

# pseudo label データ使うかどうか
coco_images = pd.concat([coco_images_train, coco_images_pseudo]).reset_index(drop=True)
coco_annotations = pd.concat([coco_annotations_train, coco_annotations_pseudo]).reset_index(drop=True)

# pseudo label only
#coco_images_train = coco_images_pseudo
#coco_annotations_train = coco_annotations_pseudo

# bloodだけにするかどうか
coco_annotations = coco_annotations[coco_annotations["category_id"]==2].reset_index(drop=True)

# unsureを除くかどうか
#coco_annotations = coco_annotations[coco_annotations["category_id"]!=3].reset_index(drop=True)

# glomerulusを除くかどうか
#coco_annotations = coco_annotations[coco_annotations["category_id"]!=1].reset_index(drop=True)      

# unsureをbloodにするかどうか
#coco_annotations.loc[coco_annotations["category_id"]==3, "category_id"] = 2

coco_annotations["id"] = coco_annotations.index + 1

coco_json["categories"] = [coco_categories.iloc[1].to_dict()]
#coco_json["categories"] = coco_categories.iloc[:2].to_dict(orient="records")

coco_json["images"] = coco_images.to_dict(orient="records")
coco_json["annotations"] = coco_annotations.to_dict(orient="records")

output_file_path = f"{CFG.general.input_path}/coco_annotations_train_all.json"
with open(output_file_path, "w", encoding="utf-8") as output_file:
    json.dump(coco_json, output_file, ensure_ascii=True, indent=4)

print(f"annos: {len(coco_annotations)}, images: {len(coco_images)}")

annos: 53380, images: 7033


In [11]:
backbone = "convnext"
if backbone == "convnext":
    cfg = Config.fromfile("../mmdetection/configs/convnext/cascade-mask-rcnn_convnext-t-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py")
    #cfg.work_dir = ""
    #Runner.from_cfg(cfg)
    
    # cfg dataset
    cfg.metainfo = {
        #"classes": ("glomerulus", "blood_vessel", )
        "classes": ("blood_vessel", )
    }
    cfg.train_pipeline = [
            dict(type="LoadImageFromFile", backend_args=None),
            dict(type="LoadAnnotations", with_bbox=True, with_mask=True, poly2mask=True),
            dict(
                type="RandomChoiceResize",
                scales=[(640, 640), (768, 768), (896, 896), (1024, 1024), (1152, 1152), (1280, 1280), (1408, 1408), (1536, 1536)],
                keep_ratio=True),
            dict(
                #type="RandomFlip", direction=["horizontal", "vertical"], prob=[0.5, 0.5],
                type="RandomFlip",prob=0.0
                ),
            dict(type="PackDetInputs")
        ]
    cfg.train_dataloader.dataset.pipeline = cfg.train_pipeline
    cfg.test_pipeline = [
            dict(type="LoadImageFromFile", backend_args=None),
            dict(type="Resize", scale=(1024, 1024), keep_ratio=True),
            dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
            dict(
                type="PackDetInputs",
                meta_keys=("img_id", "img_path", "ori_shape", "img_shape", "scale_factor"))
        ]
    cfg.val_dataloader.dataset.pipeline = cfg.test_pipeline
    cfg.data_root = f"{CFG.general.input_path}"
    cfg.train_dataloader.dataset.ann_file = f"coco_annotations_train_all.json"
    cfg.train_dataloader.dataset.data_root = cfg.data_root
    cfg.train_dataloader.dataset.data_prefix.img = "train"
    cfg.train_dataloader.dataset.metainfo = cfg.metainfo
    cfg.val_dataloader = None
    cfg.val_evaluator = None
    cfg.val_cfg = None
    cfg.test_dataloader = None
    cfg.test_evaluator = None 
    cfg.test_cfg = None

    # classes
    cfg.model.roi_head.bbox_head[0].num_classes = 1
    cfg.model.roi_head.bbox_head[1].num_classes = 1
    cfg.model.roi_head.bbox_head[2].num_classes = 1
    cfg.model.roi_head.mask_head.num_classes = 1

    # pretrained weight
    cfg.load_from = "/notebooks/data/output/pseudo_all/exp133_convnext_all_pseudo05/epoch_1.pth"

    # Set up working dir to save files and logs
    cfg.work_dir = f"{CFG.general.output_path}/{CFG.general.save_name}"

    # training configs, learning rate, optimizer
    max_iter = 10000
    cfg.train_cfg = dict(type="IterBasedTrainLoop", max_iters=max_iter, val_interval=500)
    cfg.train_dataloader.sampler = dict(type="InfiniteSampler", shuffle=True)
    cfg.log_processor = dict(by_epoch=False)
    cfg.optim_wrapper.type = "AmpOptimWrapper"
    #cfg.optim_wrapper.optimizer = dict(type="SGD", lr=0.01, momentum=0.9, weight_decay=1e-05)
    cfg.optim_wrapper.optimizer = dict(type="AdamW", lr=0.0000002, betas=(0.9, 0.999,), weight_decay=0.05)
    cfg.param_scheduler = [
            dict(
                type="LinearLR", start_factor=1.0e-3, by_epoch=False, begin=0, end=int(max_iter*0.1)),
            dict(
                type="CosineAnnealingLR",
                T_max=max_iter-int(max_iter*0.1),
                by_epoch=False,
                begin=int(max_iter*0.1),
                end=max_iter)
        ]

    # logger
    cfg.default_hooks.checkpoint.interval = 0
    cfg.default_hooks.logger.interval = 100

    # Set seed thus the results are more reproducible
    set_random_seed(CFG.general.seed, deterministic=False)

In [12]:
runner = Runner.from_cfg(cfg)

07/31 16:01:50 - mmengine - INFO - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.9.16 (main, Dec  7 2022, 01:11:51) [GCC 9.4.0]
    CUDA available: True
    numpy_random_seed: 209652396
    GPU 0: NVIDIA RTX A6000
    CUDA_HOME: /usr/local/cuda-11.6
    NVCC: Cuda compilation tools, release 11.6, V11.6.124
    GCC: x86_64-linux-gnu-gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0
    PyTorch: 1.12.1+cu116
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.6
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=co

In [13]:
seed_everything(CFG.general.seed, workers=True)
runner.train()

Global seed set to 0


loading annotations into memory...
Done (t=1.58s)
creating index...
index created!
07/31 16:01:56 - mmengine - INFO - self.paramwise_cfg is {'decay_rate': 0.7, 'decay_type': 'layer_wise', 'num_layers': 6}
07/31 16:01:56 - mmengine - INFO - Build LearningRateDecayOptimizerConstructor  layer_wise 0.7 - 8
07/31 16:01:56 - mmengine - INFO - set param backbone.downsample_layers.0.0.weight as id 0
07/31 16:01:56 - mmengine - INFO - set param backbone.downsample_layers.0.0.bias as id 0
07/31 16:01:56 - mmengine - INFO - set param backbone.downsample_layers.0.1.weight as id 0
07/31 16:01:56 - mmengine - INFO - set param backbone.downsample_layers.0.1.bias as id 0
07/31 16:01:56 - mmengine - INFO - set param backbone.downsample_layers.1.0.weight as id 2
07/31 16:01:56 - mmengine - INFO - set param backbone.downsample_layers.1.0.bias as id 2
07/31 16:01:56 - mmengine - INFO - set param backbone.downsample_layers.1.1.weight as id 2
07/31 16:01:56 - mmengine - INFO - set param backbone.downsample_

Downloading: "https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth" to /root/.cache/torch/hub/checkpoints/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth



missing keys in source state_dict: norm0.weight, norm0.bias, norm1.weight, norm1.bias, norm2.weight, norm2.bias, norm3.weight, norm3.bias

Loads checkpoint by local backend from path: /notebooks/data/output/pseudo_all/exp133_convnext_all_pseudo05/epoch_1.pth
07/31 16:02:12 - mmengine - INFO - Load checkpoint from /notebooks/data/output/pseudo_all/exp133_convnext_all_pseudo05/epoch_1.pth
07/31 16:02:12 - mmengine - INFO - Checkpoints will be saved to /notebooks/data/output/convnext_all_pseudo2_095_lr-7_10000.
07/31 16:03:12 - mmengine - INFO - Iter(train) [  100/10000]  base_lr: 2.0000e-08 lr: 1.6471e-09  eta: 1:37:32  time: 0.5478  data_time: 0.0545  memory: 10528  loss: 1.2465  loss_rpn_cls: 0.0205  loss_rpn_bbox: 0.0145  s0.loss_cls: 0.2044  s0.acc: 94.0430  s0.loss_bbox: 0.2864  s0.loss_mask: 0.1703  s1.loss_cls: 0.1261  s1.acc: 91.6992  s1.loss_bbox: 0.1527  s1.loss_mask: 0.0871  s2.loss_cls: 0.0744  s2.acc: 91.6992  s2.loss_bbox: 0.0685  s2.loss_mask: 0.0416
07/31 16:04:07 - mmen

CascadeRCNN(
  (data_preprocessor): DetDataPreprocessor()
  (backbone): ConvNeXt(
    (downsample_layers): ModuleList(
      (0): Sequential(
        (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
        (1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
      )
      (1): Sequential(
        (0): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
        (1): Conv2d(96, 192, kernel_size=(2, 2), stride=(2, 2))
      )
      (2): Sequential(
        (0): LayerNorm2d((192,), eps=1e-06, elementwise_affine=True)
        (1): Conv2d(192, 384, kernel_size=(2, 2), stride=(2, 2))
      )
      (3): Sequential(
        (0): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
        (1): Conv2d(384, 768, kernel_size=(2, 2), stride=(2, 2))
      )
    )
    (stages): ModuleList(
      (0): Sequential(
        (0): ConvNeXtBlock(
          (depthwise_conv): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
          (norm): LayerNorm2d((96,), ep