In [35]:
import os
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
import torch
import matplotlib.pyplot as plt
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.rpn import AnchorGenerator
from torch.utils.data import DataLoader
from albumentations.pytorch.transforms import ToTensorV2
from tqdm.notebook import tqdm
import albumentations as A

# from ipywidgets import IntProgress
import multiprocessing
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device
import seaborn as sns

from torchvision.ops import batched_nms
import mmcv

from ensemble_boxes import *

from matplotlib.pyplot import imshow
from PIL import Image

from torchvision.ops import box_iou

In [57]:
def plot_image(img_path, boxes, labels):
    
    image = cv2.imread(img_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image / 255
    image = image.astype('float32')
    
    categories = [
        { 'id':0, 'name': 'Aortic enlargement'},
        { 'id':1, 'name': 'Atelectasis'},
        { 'id':2, 'name': 'Calcification'},
        { 'id':3, 'name': 'Cardiomegaly'},
        { 'id':4, 'name': 'Consolidation'},
        { 'id':5, 'name': 'ILD'},
        { 'id':6, 'name': 'Infiltration'},
        { 'id':7, 'name': 'Lung Opacity'},
        { 'id':8, 'name': 'Nodule/Mass'},
        { 'id':9, 'name': 'Other lesion'},
        { 'id':10, 'name': 'Pleural effusion'},
        { 'id':11, 'name': 'Pleural thickening'},
        { 'id':12, 'name': 'Pneumothorax'},
        { 'id':13, 'name': 'Pulmonary fibrosis'},
        { 'id':14, 'name': 'No finding'},
    ] 
    
    plt.figure(figsize=(10,10))
    
    for box,label in zip(boxes, labels):
        cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (225,0,0), 4)
        label = categories[label]['name'].upper()
#         print(label)
        cv2.putText(image, 
                    label,
                    (box[0], box[1]), 
                    fontFace = cv2.FONT_HERSHEY_COMPLEX, 
                    fontScale = 1,
                    color = (255, 0, 0),
                    thickness = 2,
                    lineType = cv2.LINE_AA
                   )
    plt.imshow(image)

In [48]:
df = pd.read_csv('10_fold_dedub_abnormal_org_size.csv')
# df = pd.read_csv('train.csv')
# abnormal = df[df.class_id != 14].groupby('image_id').agg(lambda x: list(x)).reset_index()

# fold_id = 0
# train_df = df[df.fold_id != fold_id].groupby('image_id').agg(lambda x: list(x)).reset_index()

# fold_id = 1
# valid_df = df[df.fold_id == fold_id].groupby('image_id').agg(lambda x: list(x)).reset_index()

meta = pd.read_csv('train_meta.csv').set_index('image_id')
# fold_id = 1
# train_df = df[df.fold_id != fold_id].groupby('image_id').agg(lambda x: list(x)).reset_index()
# valid_df = df[df.fold_id == fold_id].groupby('image_id').agg(lambda x: list(x)).reset_index()

d = df[(df.class_id == 7) | (df.class_id == 6)].groupby('image_id').agg(lambda x: list(x)).reset_index()

In [78]:
def remove_dublicates(df, output_file):
 
    n = len(df)
   
    df_annotations = []
    iou_thr = 0.01
  
    for idx in tqdm(range(n)):

        records = df.loc[idx]
        image_id = records['image_id']

        width = meta.loc[image_id]['dim1']
        height = meta.loc[image_id]['dim0']
        
        labels = records['class_id']
        scores = np.ones(len(labels))
        
        boxes = records[['x_min','y_min', 'x_max', 'y_max']].to_numpy()
        boxes = np.array(boxes.tolist()).T
        boxes = boxes.astype(int)
        
        boxes = boxes.astype(float)
        boxes[:, 0] = boxes[:, 0] / width
        boxes[:, 2] = boxes[:, 2] / width
        boxes[:, 1] = boxes[:, 1] / height
        boxes[:, 3] = boxes[:, 3] / height

        scores = np.ones(len(labels))

#         filtering dublicates from multiple radiologists 
        boxes, scores, labels = nms([boxes], [scores], [labels], iou_thr=iou_thr)

        boxes[:, 0] = boxes[:, 0] * width
        boxes[:, 2] = boxes[:, 2] * width
        boxes[:, 1] = boxes[:, 1] * height
        boxes[:, 3] = boxes[:, 3] * height
        
        boxes = boxes.astype(int)
        
        labels = labels.astype(int)
        
#         img_path = 'train_2x/train/' + image_id + '.jpg'
#         print(image_id)
#         plot_image(img_path, boxes, labels, scores)

        for i in range(len(boxes)):

            label = labels[i]

            bbox = boxes[i]
            area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
                
            box_anno = dict(
                image_id=image_id,
                class_id=label,
                x_min=bbox[0],
                y_min=bbox[1],
                x_max=bbox[2],
                y_max=bbox[3],
                area=area,
                width=width,
                height=height)

            df_annotations.append(box_anno)
            
    d = pd.DataFrame(df_annotations)
    d.to_csv(output_file, index=False)
            

remove_dublicates(abnormal, 'abnormal_dedub_org_size.csv')

  0%|          | 0/4394 [00:00<?, ?it/s]

In [81]:
df = pd.read_csv('10_fold_dedub_abnormal_org_size.csv')

fold_id = 0

train_df = df[df.fold_id != fold_id].groupby('image_id').agg(lambda x: list(x)).reset_index()
valid_df = df[df.fold_id == fold_id].groupby('image_id').agg(lambda x: list(x)).reset_index()
# ax = sns.countplot(x="class_id",data=df)

In [76]:
def generate_data_json(df, output_file):
    
    categories = [
        { 'id':0, 'name': 'Aortic enlargement'},
        { 'id':1, 'name': 'Atelectasis'},
        { 'id':2, 'name': 'Calcification'},
        { 'id':3, 'name': 'Cardiomegaly'},
        { 'id':4, 'name': 'Consolidation'},
        { 'id':5, 'name': 'ILD'},
        { 'id':6, 'name': 'Infiltration'},
        { 'id':7, 'name': 'Lung Opacity'},
        { 'id':8, 'name': 'Nodule/Mass'},
        { 'id':9, 'name': 'Other lesion'},
        { 'id':10, 'name': 'Pleural effusion'},
        { 'id':11, 'name': 'Pleural thickening'},
        { 'id':12, 'name': 'Pneumothorax'},
        { 'id':13, 'name': 'Pulmonary fibrosis'},
    ] 
    
    n = len(df)

    annotations = []
    images = []
    
    obj_count = 0

    for idx in tqdm(range(n)):

        records = df.loc[idx]
        image_id = records['image_id']
        
        width = meta.loc[image_id]['dim1'] 
        height = meta.loc[image_id]['dim0']
    
        images.append(dict(
            id=idx,
            file_name=image_id + '.jpg',
            height=height,
            width=width))
 
        boxes = records[['x_min','y_min', 'x_max', 'y_max']].to_numpy()
        boxes = np.array(boxes.tolist()).T
        boxes = boxes.astype(int)
        labels = records['class_id']
        
#         for i in len(boxes):
            
#             box_1 = boxes[i]
            
#             for j in len(boxes):
                
#                 box_2 = boxes[j]
                
#                 iou = box_iou(box_1, box_2)

#                 if iou > 0.2 and labels[i] == labels[j]:
#         scores = np.ones(len(labels))
#         img_path = 'vinbigdata/train/' + image_id + '.jpg'
#         plot_image(img_path, boxes, labels)        
          
        for i in range(len(boxes)):

            label = labels[i]
#             print(categories[label])
            bbox = boxes[i]
            area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
            
#             if bbox[2] - bbox[0] > width * 0.6:
#                 continue
            
            data_anno = dict(
                image_id=idx,
                id=obj_count,
                category_id=label,
                bbox=[bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]],
                area=area,
                iscrowd=0)
            
            annotations.append(data_anno)
            
            obj_count += 1
    
    coco_format_json = dict(
        images=images,
        annotations=annotations,
        categories=categories)
    mmcv.dump(coco_format_json, output_file)

In [82]:
generate_data_json(train_df, 'fold_0_abnormal_train_org_size.json')

  0%|          | 0/3959 [00:00<?, ?it/s]

In [83]:
generate_data_json(valid_df, 'fold_0_abnormal_valid_org_size.json')

  0%|          | 0/435 [00:00<?, ?it/s]

In [14]:
df = pd.read_csv('train_downsampled.csv').groupby('image_id').agg(lambda x: list(x)).reset_index()

idx = 100
records = df.loc[idx]
image_id = records['image_id']

img_path = 'train_2x/train/' + image_id + '.jpg'


width = meta.loc[image_id]['dim1']
height = meta.loc[image_id]['dim0']

boxes = records[['x_min','y_min', 'x_max', 'y_max']].to_numpy()
boxes = np.array(boxes.tolist()).T
labels = records['class_id']
scores = np.ones(len(labels))

plot_image(img_path, boxes, labels, scores)

TypeError: function takes exactly 4 arguments (2 given)

<Figure size 720x720 with 0 Axes>

In [42]:
test_df = pd.read_csv('test.csv')
categories = [
    { 'id':0, 'name': 'Aortic enlargement'},
    { 'id':1, 'name': 'Atelectasis'},
    { 'id':2, 'name': 'Calcification'},
    { 'id':3, 'name': 'Cardiomegaly'},
    { 'id':4, 'name': 'Consolidation'},
    { 'id':5, 'name': 'ILD'},
    { 'id':6, 'name': 'Infiltration'},
    { 'id':7, 'name': 'Lung Opacity'},
    { 'id':8, 'name': 'Nodule/Mass'},
    { 'id':9, 'name': 'Other lesion'},
    { 'id':10, 'name': 'Pleural effusion'},
    { 'id':11, 'name': 'Pleural thickening'},
    { 'id':12, 'name': 'Pneumothorax'},
    { 'id':13, 'name': 'Pulmonary fibrosis'},
] 
images = []
for i in range(len(test_df)):
    r = test_df.iloc[i]
    image_id = r['image_id']
    images.append(dict(
        id=i,
        file_name=image_id + '.jpg',
        height=r['height'],
        width=r['width']))
coco_format_json = dict(
    images=images,
    categories=categories)
mmcv.dump(coco_format_json, 'test_coco_org.json')

In [1]:
%load_ext tensorboard

In [5]:
%tensorboard --logdir checkpoints_1024_fold_0/tf_logs --port=6008

Reusing TensorBoard on port 6008 (pid 700), started 0:00:12 ago. (Use '!kill 700' to kill it.)