The trained model is present with the name : CascadeRCNN_X101 VinBigData 20Ep MMDET in the datasets. 

In [None]:
import os
import cv2
import random


In [None]:
# Check nvcc version
!nvcc -V
# Check GCC version
!gcc --version

In [None]:
%%time

print("this will take around 10 mins")
# install dependencies: (use cu101 because colab has CUDA 10.1)
# !pip install -U torch==1.7.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html

# install mmcv-full thus we could use CUDA operators
!pip install mmcv-full


In [None]:
!rm -rf mmdetection
!git clone --branch v2.7.0 https://github.com/open-mmlab/mmdetection.git
%cd mmdetection

!pip install -e .

# install Pillow 7.0.0 back in order to avoid bug in colab
!pip install Pillow==7.0.0

In [None]:
# Check Pytorch installation
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())

# Check MMDetection installation
import mmdet
print(mmdet.__version__)

# Check mmcv installation
from mmcv.ops import get_compiling_cuda_version, get_compiler_version
print(get_compiling_cuda_version())
print(get_compiler_version())

# CascadeRNN101X Pretrained

In [None]:
!mkdir checkpoints
# !wget -c http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth \
#       -O checkpoints/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth


!wget -c https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_rcnn_x101_32x4d_fpn_1x_20190501-af628be5.pth \
      -O checkpoints/cascade_rcnn_x101_32x4d_fpn_1x_20190501-af628be5.pth


In [None]:
test_anno = "../../input/vinbigdata-1024-image-dataset/vinbigdata/test"

ids = os.listdir(test_anno)

In [None]:
len(ids)

# Preparing Test Annotation file

In [None]:
img_infos = []
for i, _id in enumerate(ids):
    if '.png' in _id:
        img_infos.append({
                    "license": 0,
                    "url": 'null',
                    "file_name": _id,
                    "height": 1024,
                    "width": 1024,
                    "date_captured": 'null',
                    "id": _id
                })

In [None]:
img_infos[0]

In [None]:
import json
val_anno = '../../input/vinbigdata-coco-dataset-with-wbf-3x-downscaled/vinbigdata-coco-dataset-with-wbf-3x-downscaled/val_annotations.json'

with open(val_anno) as f:
    dd = json.load(f)

dd.keys()
dd['annotations']=[]
dd['images']
dd['images'] = img_infos
with open('./test_ann.json', 'w') as outfile:
    json.dump(dd, outfile)

In [None]:
val_ids = os.listdir('../../input/vinbigdata-coco-dataset-with-wbf-3x-downscaled/vinbigdata-coco-dataset-with-wbf-3x-downscaled/val_images')

In [None]:
_classes = ("Aortic_enlargement", "Atelectasis", "Calcification", "Cardiomegaly", "Consolidation", "ILD", "Infiltration", "Lung_Opacity", "Nodule/Mass", "Other_lesion", "Pleural_effusion", "Pleural_thickening", "Pneumothorax", "Pulmonary_fibrosis")

# Custom Config Options

In [None]:
_cfg_options = {"dataset_type" : 'CocoDataset',
"classes" : '''("Aortic_enlargement", "Atelectasis", "Calcification", "Cardiomegaly", "Consolidation", "ILD", "Infiltration", "Lung_Opacity", "Nodule/Mass", "Other_lesion", "Pleural_effusion", "Pleural_thickening", "Pneumothorax", "Pulmonary_fibrosis")''',
"data.train.img_prefix" : '../../input/vinbigdata-coco-dataset-with-wbf-3x-downscaled/vinbigdata-coco-dataset-with-wbf-3x-downscaled/',
"data.train.classes" : '''("Aortic_enlargement", "Atelectasis", "Calcification", "Cardiomegaly", "Consolidation", "ILD", "Infiltration", "Lung_Opacity", "Nodule/Mass", "Other_lesion", "Pleural_effusion", "Pleural_thickening", "Pneumothorax", "Pulmonary_fibrosis")''',
"data.train.ann_file" : '../../input/vinbigdata-coco-dataset-with-wbf-3x-downscaled/vinbigdata-coco-dataset-with-wbf-3x-downscaled/train_annotations.json',
"data.train.type" : 'CocoDataset',
"data.val.img_prefix" : '../../input/vinbigdata-coco-dataset-with-wbf-3x-downscaled/vinbigdata-coco-dataset-with-wbf-3x-downscaled/',
"data.val.classes" : '''("Aortic_enlargement", "Atelectasis", "Calcification", "Cardiomegaly", "Consolidation", "ILD", "Infiltration", "Lung_Opacity", "Nodule/Mass", "Other_lesion", "Pleural_effusion", "Pleural_thickening", "Pneumothorax", "Pulmonary_fibrosis")''',
"data.val.ann_file" : '../../input/vinbigdata-coco-dataset-with-wbf-3x-downscaled/vinbigdata-coco-dataset-with-wbf-3x-downscaled/val_annotations.json',
"data.val.type" : 'CocoDataset',
"data.test.img_prefix" : '../../input/vinbigdata-1024-image-dataset/vinbigdata/test/',
"data.test.classes" : '''("Aortic_enlargement", "Atelectasis", "Calcification", "Cardiomegaly", "Consolidation", "ILD", "Infiltration", "Lung_Opacity", "Nodule/Mass", "Other_lesion", "Pleural_effusion", "Pleural_thickening", "Pneumothorax", "Pulmonary_fibrosis")''',
"data.test.ann_file" : './test_ann.json',
"data.test.type":'CocoDataset',
"data.train.type" : 'CocoDataset',
"data.val.type" : 'CocoDataset',
"data.test.type" : 'CocoDataset',
"log_config.interval" : 10,
"evaluation.metric" : 'bbox',
"load_from" : './checkpoints/cascade_rcnn_x101_32x4d_fpn_1x_20190501-af628be5.pth',
"work_dir" : "../vinbig_output",
"total_epochs" : '21'}


# "model.roi_head.bbox_head[0].num_classes" : '14',
# "model.roi_head.bbox_head[1].num_classes" : '14',
# "model.roi_head.bbox_head[2].num_classes" : '14',

cfg_op = ""
for k, v in _cfg_options.items():
    cfg_op+=f"{k}='{v}' "
print(cfg_op)


# Loading the 20ep trained model

In [None]:
from mmdet.apis import inference_detector, init_detector, show_result_pyplot
from mmcv import Config

# Choose to use a config and initialize the detector
# config = 'configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py'
config = Config.fromfile('./configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py')
config.model.roi_head.bbox_head[0].num_classes = 14
config.model.roi_head.bbox_head[1].num_classes = 14
config.model.roi_head.bbox_head[2].num_classes = 14
checkpoint = '../../input/cascadercnnx-vinbigdata-20ep-1024/epoch_20.pth'
# initialize the detector

model = init_detector(config, checkpoint, device='cuda:0', cfg_options=_cfg_options)
model.CLASSES = _classes


# Option 1 for inference - Using only 10 images for demo


In [None]:
_id = random.randint(1,1098)
# Use the detector to do inference
# img = f'../../input/vinbigdata-coco-dataset-with-wbf-3x-downscaled/vinbigdata-coco-dataset-with-wbf-3x-downscaled/val_images/{val_ids[_id]}'
from tqdm import tqdm
test_img_ids = os.listdir("../../input/vinbigdata-1024-image-dataset/vinbigdata/test")
result = {}
for _id in tqdm(test_img_ids, total=len(test_img_ids)):
    img_path = "../../input/vinbigdata-1024-image-dataset/vinbigdata/test/" + f"{_id}"
    pred = inference_detector(model, img_path)
    result[_id] = pred
# img = "../../input/vinbigdata-1024-image-dataset/vinbigdata/test/002a34c58c5b758217ed1f584ccbcfe9.png"
# result = inference_detector(model, img)
# show_result_pyplot(model, img, result, score_thr=0.3)

# Option 2 for inference

In [None]:
# !python tools/test.py ./configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py ../../input/cascadercnnx-vinbigdata-20ep-1024/epoch_20.pth --cfg-options dataset_type='CocoDataset' classes='("Aortic_enlargement", "Atelectasis", "Calcification", "Cardiomegaly", "Consolidation", "ILD", "Infiltration", "Lung_Opacity", "Nodule/Mass", "Other_lesion", "Pleural_effusion", "Pleural_thickening", "Pneumothorax", "Pulmonary_fibrosis")' data.train.img_prefix='../../input/vinbigdata-coco-dataset-with-wbf-3x-downscaled/vinbigdata-coco-dataset-with-wbf-3x-downscaled/' data.train.classes='("Aortic_enlargement", "Atelectasis", "Calcification", "Cardiomegaly", "Consolidation", "ILD", "Infiltration", "Lung_Opacity", "Nodule/Mass", "Other_lesion", "Pleural_effusion", "Pleural_thickening", "Pneumothorax", "Pulmonary_fibrosis")' data.train.ann_file='../../input/vinbigdata-coco-dataset-with-wbf-3x-downscaled/vinbigdata-coco-dataset-with-wbf-3x-downscaled/train_annotations.json' data.train.type='CocoDataset' data.val.img_prefix='../../input/vinbigdata-coco-dataset-with-wbf-3x-downscaled/vinbigdata-coco-dataset-with-wbf-3x-downscaled/' data.val.classes='("Aortic_enlargement", "Atelectasis", "Calcification", "Cardiomegaly", "Consolidation", "ILD", "Infiltration", "Lung_Opacity", "Nodule/Mass", "Other_lesion", "Pleural_effusion", "Pleural_thickening", "Pneumothorax", "Pulmonary_fibrosis")' data.val.ann_file='../../input/vinbigdata-coco-dataset-with-wbf-3x-downscaled/vinbigdata-coco-dataset-with-wbf-3x-downscaled/val_annotations.json' data.val.type='CocoDataset' data.test.img_prefix='../../input/vinbigdata-1024-image-dataset/vinbigdata/test/' data.test.classes='("Aortic_enlargement", "Atelectasis", "Calcification", "Cardiomegaly", "Consolidation", "ILD", "Infiltration", "Lung_Opacity", "Nodule/Mass", "Other_lesion", "Pleural_effusion", "Pleural_thickening", "Pneumothorax", "Pulmonary_fibrosis")' data.test.ann_file='./test_ann.json' data.test.type='CocoDataset' log_config.interval='10' evaluation.metric='bbox' load_from='./checkpoints/cascade_rcnn_x101_32x4d_fpn_1x_20190501-af628be5.pth' work_dir='../vinbig_output' total_epochs='21' --out preds_cascadex.pkl

In [None]:
os.listdir("./")

In [None]:
# import pickle

# with open('./preds_cascadex.pkl', 'rb') as f:
#     data = pickle.load(f)

In [None]:

import json
with open('./test_ann.json', 'rb') as f:
    ann = json.load(f)

In [None]:
# file_ids = [file_name.get('file_name').split('.png')[0] for file_name in ann.get('images')]

In [None]:
!pip install ensemble_boxes

In [None]:
import pandas as pd
test_df = pd.read_csv('../../input/vinbigdata-original-image-dataset/vinbigdata/test.csv')

In [None]:
from ensemble_boxes import *

In [None]:
# ann_with_pred = zip(file_ids, data)
submission_vals = []
# for _id, preds in ann_with_pred


# this method is for option1
for _id, preds in result.items():
    boxes = []
    scores = []
    labels = []
    _id = _id.split('.png')[0]
    width = test_df[test_df.image_id==_id]['width'].iloc[0]
    height = test_df[test_df.image_id==_id]['height'].iloc[0]  
    for i, pred in enumerate(preds):
        if len(pred):
            for p in pred:
                box = p[:4]/1024
                boxes.append(box)
                score = p[4].astype(float)
                scores.append(score)
                labels.append(i)
    boxes, scores, labels = weighted_boxes_fusion([boxes], [scores], [labels], iou_thr=0.4, skip_box_thr=0.4)
    boxes[:, 0] = boxes[:, 0]*height
    boxes[:, 2] = boxes[:, 2]*height
    boxes[:, 1] = boxes[:, 1]*width
    boxes[:, 3] = boxes[:, 3]*width
    
    scaled_boxes = boxes.astype(int)
    labels = labels.astype(int)
    _id_preds = []
    if len(scaled_boxes):
        for i in range(len(scaled_boxes)):
            _id_preds.append(str(labels[i]))
            _id_preds.append(str(scores[i].round(2)))
            _id_preds.append(str(scaled_boxes[i][0]))
            _id_preds.append(str(scaled_boxes[i][1]))
            _id_preds.append(str(scaled_boxes[i][2]))
            _id_preds.append(str(scaled_boxes[i][3]))
        pred_str = " ".join(_id_preds)
    else:
        pred_str = '14 1 0 0 1 1'
    submission_vals.append([_id, pred_str])



In [None]:
df = pd.DataFrame(submission_vals, columns = ['image_id','PredictionString'])

In [None]:
result['e94fde220360e4b769921e16059cc6af.png']

In [None]:


result['b3f67ac077531f44dd06275af31edbd9.png']



In [None]:
df.head()

In [None]:
len(df)

In [None]:
df[df.PredictionString=="14 1 0 0 1 1"].count()
                                             

In [None]:
df.tail(10)

In [None]:
df.iloc[2222]['PredictionString']

In [None]:
df.to_csv('submission.csv', header=True, index=False)