In [1]:
import os
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
import torch
import matplotlib.pyplot as plt
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.rpn import AnchorGenerator
from torch.utils.data import DataLoader
from albumentations.pytorch.transforms import ToTensorV2
from tqdm.notebook import tqdm
import albumentations as A

# from ipywidgets import IntProgress
import multiprocessing
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device
import seaborn as sns

from torchvision.ops import batched_nms
import mmcv

from ensemble_boxes import *

from matplotlib.pyplot import imshow
from PIL import Image

from torchvision.ops import box_iou

In [2]:
def plot_image(img_path, boxes, labels):
    
    image = cv2.imread(img_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image / 255
    image = image.astype('float32')
    
    categories = [
        { 'id':0, 'name': 'Aortic enlargement'},
        { 'id':1, 'name': 'Atelectasis'},
        { 'id':2, 'name': 'Calcification'},
        { 'id':3, 'name': 'Cardiomegaly'},
        { 'id':4, 'name': 'Consolidation'},
        { 'id':5, 'name': 'ILD'},
        { 'id':6, 'name': 'Infiltration'},
        { 'id':7, 'name': 'Lung Opacity'},
        { 'id':8, 'name': 'Nodule/Mass'},
        { 'id':9, 'name': 'Other lesion'},
        { 'id':10, 'name': 'Pleural effusion'},
        { 'id':11, 'name': 'Pleural thickening'},
        { 'id':12, 'name': 'Pneumothorax'},
        { 'id':13, 'name': 'Pulmonary fibrosis'},
        { 'id':14, 'name': 'No finding'},
    ] 
    
    plt.figure(figsize=(10,10))
    
    for box,label in zip(boxes, labels):
        cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (225,0,0), 4)
        label = categories[label]['name'].upper()
#         print(label)
        cv2.putText(image, 
                    label,
                    (box[0], box[1]), 
                    fontFace = cv2.FONT_HERSHEY_COMPLEX, 
                    fontScale = 1,
                    color = (255, 0, 0),
                    thickness = 2,
                    lineType = cv2.LINE_AA
                   )
    plt.imshow(image)

In [40]:
df = pd.read_csv('10_fold_dedub_abnormal_org_size.csv')

fold_id = 0
meta = pd.read_csv('train_meta.csv').set_index('image_id')


train_df = df[df.fold_id != fold_id].groupby('image_id').agg(lambda x: list(x)).reset_index()
valid_df = df[df.fold_id == fold_id].groupby('image_id').agg(lambda x: list(x)).reset_index()
full_df = df.groupby('image_id').agg(lambda x: list(x)).reset_index()
# ax = sns.countplot(x="class_id",data=df)

In [38]:
def generate_label_txt(df, output_dir):

    n = len(df)

    images = []
    
    for idx in tqdm(range(n)):

        records = df.loc[idx]
        image_id = records['image_id']
        
        width = meta.loc[image_id]['dim1'] 
        height = meta.loc[image_id]['dim0']
    
        images.append(dict(
            id=idx,
            file_name=image_id + '.jpg',
            height=height,
            width=width))
 
        boxes = records[['x_min','y_min', 'x_max', 'y_max']].to_numpy()
        boxes = np.array(boxes.tolist()).T
        boxes = boxes.astype(int)
        labels = records['class_id']
            
        with open(output_dir + image_id + '.txt', 'w') as f:
            
            for i in range(len(boxes)):

                label = labels[i]
                bbox = boxes[i]
                
                c_x = (bbox[2] + bbox[0]) // 2
                c_y = (bbox[3] + bbox[1]) // 2
                w = (bbox[2] - bbox[0])
                h = (bbox[3] - bbox[1])
                
                c_x = round(c_x / width, 4)
                c_y = round(c_y / height, 4)
                w = round(w / width, 4)
                h = round(h / height, 4)

                s = f'{label} {c_x} {c_y} {w} {h} \n'
                
                f.write(s)


In [41]:
generate_label_txt(full_df, 'vinbigdata/labels/train/')

  0%|          | 0/4394 [00:00<?, ?it/s]

In [83]:
generate_data_json(valid_df, 'fold_0_abnormal_valid_org_size.json')

  0%|          | 0/435 [00:00<?, ?it/s]

In [45]:
def generate_image_txt(df, images_path, output_file):

    n = len(df)

    with open(output_file, 'w') as f:
    
        for idx in tqdm(range(n)):

            records = df.loc[idx]
            image_id = records['image_id']
            
            s = images_path + image_id + '.jpg\n'  
            
            f.write(s)


In [48]:
generate_image_txt(train_df, '../vinbigdata/images/train/', 'train.txt')

  0%|          | 0/3959 [00:00<?, ?it/s]

In [51]:
generate_image_txt(valid_df, '../vinbigdata/images/train/', 'yolov5/val.txt')

  0%|          | 0/435 [00:00<?, ?it/s]

In [42]:
test_df = pd.read_csv('test.csv')
categories = [
    { 'id':0, 'name': 'Aortic enlargement'},
    { 'id':1, 'name': 'Atelectasis'},
    { 'id':2, 'name': 'Calcification'},
    { 'id':3, 'name': 'Cardiomegaly'},
    { 'id':4, 'name': 'Consolidation'},
    { 'id':5, 'name': 'ILD'},
    { 'id':6, 'name': 'Infiltration'},
    { 'id':7, 'name': 'Lung Opacity'},
    { 'id':8, 'name': 'Nodule/Mass'},
    { 'id':9, 'name': 'Other lesion'},
    { 'id':10, 'name': 'Pleural effusion'},
    { 'id':11, 'name': 'Pleural thickening'},
    { 'id':12, 'name': 'Pneumothorax'},
    { 'id':13, 'name': 'Pulmonary fibrosis'},
] 
images = []
for i in range(len(test_df)):
    r = test_df.iloc[i]
    image_id = r['image_id']
    images.append(dict(
        id=i,
        file_name=image_id + '.jpg',
        height=r['height'],
        width=r['width']))
coco_format_json = dict(
    images=images,
    categories=categories)
mmcv.dump(coco_format_json, 'test_coco_org.json')

In [1]:
%load_ext tensorboard

In [5]:
%tensorboard --logdir checkpoints_1024_fold_0/tf_logs --port=6008

Reusing TensorBoard on port 6008 (pid 700), started 0:00:12 ago. (Use '!kill 700' to kill it.)