In [None]:
# # This Python 3 environment comes with many helpful analytics libraries installed
# # It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# # For example, here's several helpful packages to load

# #import numpy as np # linear algebra
# #import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# import torch
# import cv2 as cv
# import numpy as np
# import os
# import pandas as pd
# import pydicom

# # Input data files are available in the read-only "../input/" directory
# # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
# '''
# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))
# '''
# # You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# # You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session




# Detection

## 1-1.Setup MMDetection Library for object Detection

In [None]:
# torch version is 1.7.0+cu110

!pip install torch==1.7.0+cu110 torchvision==0.8.1+cu110 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html

In [None]:
# mmDetection version is 1.3.8

!pip install mmcv-full==1.3.8 -f https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.0/index.html

In [None]:
# install mmDetection in Kaggle env.

!rm -rf mmdetection
!git clone -b v2.18.1 https://github.com/open-mmlab/mmdetection.git
!cd mmdetection && pip install -e .

!pip install Pillow==7.0.0

In [None]:
# setup sys.path for mmDetection in Kaggle env.

import sys
print(sys.path)
sys.path.append('/kaggle/working/mmdetection/mmdet/models/detectors')
sys.path.insert(0, "./mmdetection")
print(sys.path)

## 1-2 Setup wandb and Import wandb (A library which can track the training process and log our experiment results)

I have saved my API token with "wandb_key" as Label. Please check the "Add-ons->Secret" button in the menu of this notebook. 

In [None]:
# Install wandb and Login wandb

!pip install wandb --upgrade

import wandb
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()

wandb_api = user_secrets.get_secret("wandb_key") 
wandb.login(key=wandb_api)

wnb_username = 'sjs1999'
wnb_project_name = 'siim-covid19-2'

## 1-3 Import everything and Seed everything

In [None]:
# Import Everything

import sys
import os
import random
import numpy as np
import torch, torchvision

from pathlib import Path
from mmcv.ops import get_compiling_cuda_version, get_compiler_version
from mmdet.apis import set_random_seed

import mmdet
from mmdet.apis import set_random_seed
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector
from mmcv import Config

In [None]:
# Seed Everything

global_seed = 20563228

def set_seed(seed=global_seed):
    set_random_seed(seed, deterministic=True)  # mmdet random seed, deterministic=True to seed gpu/cudnn state
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed(global_seed)

## 2-1 Choose mm model

In [None]:
# Config Input/model/output 

baseline_cfg_path = "/kaggle/working/mmdetection/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py"  # cascade-rcnn
model_name = 'cascade_rcnn_x101_32x4d_fpn_1x'
fold = 0  # fold can be set to 0-4
job = 1
job_folder = f'/kaggle/working/job{job}_{model_name}_fold{fold}'

if not os.path.exists(job_folder):
    os.makedirs(job_folder)

In [None]:
# Config basic parameter    

cfg = Config.fromfile(baseline_cfg_path)
cfg.work_dir = job_folder
cfg.seed = global_seed
cfg.log_config.interval = 20 
cfg.checkpoint_config.interval = 1 

In [None]:
# Config wandb

cfg.log_config.hooks = [dict(type='TextLoggerHook'),
                        dict(type='WandbLoggerHook',
                         init_kwargs=dict(project=wnb_project_name,
                                          name=f'exp-{model_name}-fold{fold}-job{job}',
                                          entity=wnb_username))
                       ]

## 2-2 Config the hyper-parameter

In [None]:
# Config pretrained/head

for head in cfg.model.roi_head.bbox_head:
    head.num_classes = 1

cfg.gpu_ids = [0]
cfg.model.backbone.init_cfg=dict(type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')
cfg.model.pop('pretrained', None)

In [None]:
# Config hyper-parameters

cfg.runner.max_epochs = 12
cfg.total_epochs = 12
cfg.optimizer.lr = 0.02/8
cfg.lr_config = dict(
    policy='CosineAnnealing', 
    by_epoch=False,
    warmup='linear', 
    warmup_iters=500, 
    warmup_ratio=0.001, 
    min_lr=1e-07)

## 2-3 Config the training process and evaluation metrics

Organize the native dataset into (512x512) size and divide the data into 5-fold

In [None]:
# Config the dataset

cfg.dataset_type = 'CocoDataset' 
cfg.classes = ("Covid_Abnormality",)

cfg.data.train.img_prefix = '/kaggle/input/siim-covid19-512-images-and-metadata/train' 
cfg.data.train.classes = cfg.classes
cfg.data.train.ann_file = f'/kaggle/input/siim-covid19-coco-512x512-groupkfold/train_annotations_fold{fold}.json'
cfg.data.train.type='CocoDataset'

cfg.data.val.img_prefix = '/kaggle/input/siim-covid19-512-images-and-metadata/train' 
cfg.data.val.classes = cfg.classes
cfg.data.val.ann_file = f'/kaggle/input/siim-covid19-coco-512x512-groupkfold/val_annotations_fold{fold}.json'
cfg.data.val.type='CocoDataset'

cfg.data.test.img_prefix = '/kaggle/input/siim-covid19-512-images-and-metadata/train' 
cfg.data.test.classes = cfg.classes
cfg.data.test.ann_file =  f'/kaggle/input/siim-covid19-coco-512x512-groupkfold/val_annotations_fold{fold}.json'
cfg.data.test.type='CocoDataset'

cfg.data.samples_per_gpu = 4 
cfg.data.workers_per_gpu = 2 

cfg.evaluation.metric = 'bbox' 
cfg.evaluation.interval = 1
cfg.evaluation.iou_thrs = [0.5]

## 2-4 Preprocess and augment the data

In [None]:
# Config the augmentation

albu_train_transforms = [
    dict(type='ShiftScaleRotate', shift_limit=0.0625,
         scale_limit=0.15, rotate_limit=15, p=0.4),
    dict(type='RandomBrightnessContrast', brightness_limit=0.2,
         contrast_limit=0.2, p=0.5),
    dict(type='IAAAffine', shear=(-10.0, 10.0), p=0.4),
    dict(type="Blur", p=1.0, blur_limit=7),
    dict(type='CLAHE', p=0.5),
    dict(type='Equalize', mode='cv', p=0.4),
    dict(
        type="OneOf",
        transforms=[
            dict(type="GaussianBlur", p=1.0, blur_limit=7),
            dict(type="MedianBlur", p=1.0, blur_limit=7),
        ],
        p=0.4,
    ),
]

In [None]:
# Config the data pipeline

cfg.train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(
        type='Albu',
        transforms=albu_train_transforms,
        bbox_params=dict(
        type='BboxParams',
        format='pascal_voc',
        label_fields=['gt_labels'],
        min_visibility=0.0,
        filter_lost_elements=True),
        keymap=dict(img='image', gt_bboxes='bboxes'),
        update_pad_shape=False,
        skip_img_without_anno=True),
    dict(
        type='Normalize',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        to_rgb=True),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])
]

cfg.test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1333, 800),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_rgb=True),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img'])
        ]
    )
]

In [None]:
# Dump the config

cfg_path = f'{job_folder}/job{job}_{Path(baseline_cfg_path).name}'
print(cfg_path)

cfg.dump(cfg_path)
# print(f'Config:\n{cfg.pretty_text}')

## 3 Build detector and Train

In [None]:
# Build the model

model = build_detector(cfg.model,
                       train_cfg=cfg.get('train_cfg'),
                       test_cfg=cfg.get('test_cfg'))
model.init_weights()

In [None]:
# Build the dataset

datasets = [build_dataset(cfg.data.train)]

In [None]:
# Train the model

train_detector(model, datasets[0], cfg, distributed=False, validate=True)

In [None]:
# Find the best epoch

import json
from collections import defaultdict

log_file = f'{job_folder}/None.log.json'

def load_json_logs(json_logs):
    log_dicts = [dict() for _ in json_logs]
    for json_log, log_dict in zip(json_logs, log_dicts):
        with open(json_log, 'r') as log_file:
            for line in log_file:
                log = json.loads(line.strip())
                if 'epoch' not in log:
                    continue
                epoch = log.pop('epoch')
                if epoch not in log_dict:
                    log_dict[epoch] = defaultdict(list)
                for k, v in log.items():
                    log_dict[epoch][k].append(v)
    return log_dicts

log_dict = load_json_logs([log_file])
best_epoch = np.argmax([item['bbox_mAP'][0] for item in log_dict[0].values()])+1
best_epoch

In [None]:
# Wandb visualization

model_files = [f'{job_folder}/epoch_{best_epoch}.pth',
               cfg_path
              ]

run = wandb.init(project=wnb_project_name,
                 name=f'models_files_{model_name}_fold{fold}_job{job}',
                 entity=wnb_username,
                 group='Artifact',
                 job_type='model-files')

artifact = wandb.Artifact(f'models_files_{model_name}_fold{fold}_job{job}', type='model')

for model_file in model_files:
    artifact.add_file(model_file)

run.log_artifact(artifact)
run.finish()

# 4 predict

In [None]:
# Import ALL

import mmcv
from mmdet.models import build_detector
from mmcv.runner import load_checkpoint
from mmcv.parallel import MMDataParallel
from mmdet.datasets import build_dataloader, build_dataset
from mmdet.apis import single_gpu_test
from mmdet.apis import init_detector, inference_detector, show_result_pyplot

import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import cv2
import json
import numpy as np
import os
import torch

## 4-1 Load data and model

In [None]:
# Load data

with open("../input/siim-covid19-coco-512x512-groupkfold/val_annotations_fold0.json") as f:
    val_ann = json.load(f)
imagepaths = [item['file_name'] for item in val_ann['images'][:9]]

df_annotations = pd.read_csv('../input/siim-covid19-512-images-and-metadata/df_train_processed_meta.csv')

In [None]:
# Draw function

def draw_bbox(img, box, label, color, label_size=0.5, alpha_box=0.3, alpha_label=0.6):
    
    overlay_bbox = img.copy()
    overlay_label = img.copy()
    output = img.copy()

    text_width, text_height = cv2.getTextSize(label.upper(), cv2.FONT_HERSHEY_SIMPLEX, label_size, 1)[0]
    cv2.rectangle(overlay_bbox, (box[0], box[1]), (box[2], box[3]), color, -1)
    cv2.addWeighted(overlay_bbox, alpha_box, output, 1-alpha_box, 0, output)
    
    cv2.rectangle(overlay_label, (box[0], box[1]-7-text_height), (box[0]+text_width+2, box[1]), (0, 0, 0), -1)
    cv2.addWeighted(overlay_label, alpha_label, output, 1-alpha_label, 0, output)
    output = cv2.rectangle(output, (box[0], box[1]), (box[2], box[3]), color, 2)
    cv2.putText(output, label.upper(), (box[0], box[1]-5),
            cv2.FONT_HERSHEY_SIMPLEX, label_size, (255, 255, 255), 1, cv2.LINE_AA)
    return output

In [None]:
# Load model

checkpoint = f'{job_folder}/epoch_{best_epoch}.pth'

print("Loading weights from:", checkpoint)
cfg = Config.fromfile(cfg_path)
model = init_detector(cfg, checkpoint, device='cuda:0')

## 4-2 Model Visualization

In [None]:
# Heatmap Function

def featuremap_2_heatmap(feature_map):
    assert isinstance(feature_map, torch.Tensor)
    feature_map = feature_map.detach()
    heatmap = feature_map[:,0,:,:]*0
    heatmaps = []
    for c in range(feature_map.shape[1]):
        heatmap+=feature_map[:,c,:,:]
    heatmap = heatmap.cpu().numpy()
    heatmap = np.mean(heatmap, axis=0)

    heatmap = np.maximum(heatmap, 0)
    heatmap /= np.max(heatmap)
    heatmaps.append(heatmap)

    return heatmaps

def draw_feature_map(features,img,save_dir = 'feature_map',name = None):
    i=0
    if isinstance(features,torch.Tensor):
        for heat_maps in features:
            heat_maps = heat_maps.unsqueeze(0)
            heatmaps = featuremap_2_heatmap(heat_maps)
            for heatmap in heatmaps:
                heatmap = np.uint8(255 * heatmap)
                heatmap = cv2.resize(heatmap, (512, 512))  
                heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
                superimposed_img = heatmap
                plt.imshow(superimposed_img,cmap='gray')
                plt.show()
    else:
        for featuremap in features:
            heatmaps = featuremap_2_heatmap(featuremap)
            for heatmap in heatmaps:
                heatmap = np.uint8(255 * heatmap)
                heatmap = cv2.resize(heatmap, (512, 512)) 
                
                imGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                superimposed_img =imGray / 255 + (heatmap)/255
                plt.imshow(superimposed_img)
                plt.show()

In [None]:
# Heatmap Visualization

from torchvision import transforms

new_size = (512, 512)
imgs_path = "/kaggle/input/siim-covid19-512-images-and-metadata/train"
threshold = 0.45

# fig, axes = plt.subplots(3,3, figsize=(19,21))
# fig.subplots_adjust(hspace=0.2, wspace=0.2)
# axes = axes.ravel()

results_list = []

loader = transforms.Compose([
    transforms.ToTensor()]) 

for idx, img_id in enumerate(imagepaths):
    img_path = os.path.join(imgs_path, img_id)
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    result = inference_detector(model, img_path)
    feature_map = model.extract_feat(loader(img).unsqueeze(0).to('cuda:0'))
    draw_feature_map(feature_map, img)
    
    results_filtered = result[0][result[0][:, 4]>threshold]
    bboxes = results_filtered[:, :4]
    scores = results_filtered[:, 4] 
    results_list.append(result[0])
    
    for box in bboxes:
        img = draw_bbox(img, list(np.int_(box)), "Covid_Abnormality",
                        (255, 243, 0))

    axes[idx].imshow(img, cmap='gray')
    axes[idx].set_title(img_id, size=18, pad=30)
    axes[idx].set_xticklabels([])
    axes[idx].set_yticklabels([])

## 4-3 Result Visualization

In [None]:
# Wandb Image Visualization

run = wandb.init(project=wnb_project_name,
                 name=f'images-{model_name}-fold{fold}-job{job}',
                 job_type='images')

class_id_to_label = {
    1: "pred_covid_abnormality",
    2: "GT_covid_abnormality"
}

wnb_images = []

for img_id, result in zip(imagepaths, results_list):
    
    bboxes = result[:, :4]
    scores = result[:, 4]
    ann_dict = {"predictions":{
                        "box_data":[],
                        "class_labels": class_id_to_label
                        },
                "ground_truth":{
                        "box_data":[],
                        "class_labels": class_id_to_label
                        }
                    }

    for box, score in zip(bboxes, scores):
        single_data = {
            "position": {
                "minX": round(float(box[0])/512, 3),
                "maxX": round(float(box[2])/512, 3),
                "minY": round(float(box[1])/512, 3),
                "maxY": round(float(box[3])/512, 3),
            },
            "class_id" : 1,
            "box_caption": class_id_to_label[1],
            "scores" : {
                "confidence": float(score),
            }
        }
        ann_dict["predictions"]["box_data"].append(single_data)

    image_annotations = df_annotations[df_annotations.id==img_id.strip('.png')]

    for idxx, row in image_annotations[['xmin', 'ymin', 'xmax', 'ymax']].iterrows():
        single_data = {
            "position": {
                "minX": round(float(row[0])/512, 3),
                "maxX": round(float(row[2])/512, 3),
                "minY": round(float(row[1])/512, 3),
                "maxY": round(float(row[3])/512, 3),
            },
            "class_id" : 2,
            "box_caption": class_id_to_label[2],
            "scores" : {
                "confidence": 1.0,
            }
        }
        ann_dict["ground_truth"]["box_data"].append(single_data)

    image = cv2.imread(os.path.join(imgs_path, img_id))
    wnb_images.append(wandb.Image(image, boxes=ann_dict))
    
wandb.log({f'images-{model_name}-fold{fold}-job{job}': wnb_images})

run.finish()
run