## Setup

In [None]:
import os
import glob
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Config

In [None]:
config = {
    "data_dir": "/kaggle/input/siim-covid19-detection-512",
    "size": 512,
}

In [None]:
CLASSES = ['Negative for Pneumonia', 'Typical Appearance',
           'Indeterminate Appearance', 'Atypical Appearance']

## Helper functions

In [None]:
def get_boxes(data_frame, id_image):
    
    data = df.loc[id_image]

    scale = np.array([data["scale_w"], data["scale_h"]]*2)

    boxes = data['label']
    boxes = np.array(boxes.split(' ')) # split words
    boxes = np.reshape(boxes, (-1, 6)) # reshape to [num_boxes, 6]
    boxes = boxes[np.where(boxes=='opacity')[0]] # remove placeholder boxes
    boxes = boxes[:,2:].astype(np.float32) # get x1, y1, x2, y2
    boxes = boxes * scale # resize boxes
    boxes = np.around(boxes, 0).astype(np.int16) # convert to whole numbers
    
    return boxes

In [None]:
def get_label(data_frame, id_image):
    
    data = df.loc[id_image]

    label = data[['Negative for Pneumonia', 'Typical Appearance',
                  'Indeterminate Appearance', 'Atypical Appearance']].values
    
    label = np.argmax(label)
    
    return label

## Arange files and dataframe

In [None]:
# list files
image_files = sorted(glob.glob(f'{config["data_dir"]}/images/*.png'))

# read csv
df = pd.read_csv(f'{config["data_dir"]}/train.csv')

# add scale factors
df["scale_w"] = config["size"]/df["width"]
df["scale_h"] = config["size"]/df["height"]

# Set 'id_image' as index for easy access
df = df.set_index('id_image')

## Check sample 

In [None]:
c = 0

file_path = image_files[c]

# Extract image id
id_image = file_path.split('/')[-1][:-4]

# Read image
image = cv2.imread(file_path, 0)

# Get boxes
boxes = get_boxes(df, id_image)

# Get label
label = get_label(df, id_image)
label = CLASSES[label]

# Draw bounding boxes
draw_image = np.dstack([image]*3)
for x1,y1,x2,y2 in boxes:
    draw_image = cv2.rectangle(draw_image, (x1,y1), (x2,y2), (255,0,0), 2)

# Plot image with bounding box
plt.figure(figsize=(5,5))
plt.title(label)
plt.imshow(draw_image)
plt.axis("off")
plt.show()