# Plotting Data
### This brief notebook simply plots many xray images with and without their bounding boxes. It is intended for understanding what the data looks like we are working with.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
%matplotlib inline

In [None]:
PATH = '../input/vinbigdata-competition-jpg-data-4x-downsampled/train_downsampled.csv'
IMG_ROOT = '../input/vinbigdata-competition-jpg-data-4x-downsampled/train/train/'

In [None]:
data = pd.read_csv(PATH, delimiter=',')

In [None]:
data.head()

In [None]:
# Get image as numpy array
def load_image(name, path):
    img_path = path + name + '.jpg'
    img = cv2.imread(img_path)
    return img

# Plot numpy array
def plot_image(img):
    plt.imshow(img)
    plt.title(img.shape)
    
# Plot a grid of examples
def plot_grid(img_names, img_root, rows=5, cols=5):
    fig = plt.figure(figsize=(25,25))
    
    for i,name in enumerate(img_names):
        fig.add_subplot(rows,cols,i+1)
        img = load_image(name, img_root)
        plot_image(img)
        
    plt.show()

In [None]:
plot_grid(data['image_id'][:25], IMG_ROOT)

In [None]:
# filter out healthy samples
disease_data = data[data['class_id'] != 14]
# get unique filenames
filenames = list(set(disease_data['image_id'].values.tolist()))
# print a few filenames
print(filenames[:10])

In [None]:
# print the annotations from a single xray image
disease_data[disease_data['image_id'] == filenames[10]]

In [None]:
def class_to_color(class_id):
    colors = [(255,0,0),(0,255,0),(0,0,255),(255,255,0),(255,0,255),(0,255,255),(255,100,100),
              (100,255,100),(100,100,255),(255,100,0),(255,0,100),(100,0,255),(100,100,255),(100,255,0),
              (100,255,100)]
    return colors[class_id]

# draw a single bounding box onto a numpy array image
def draw_bounding_box(img, annotation):
    if annotation.isnull().values.any():
        return
    
    x_min, y_min = int(annotation['x_min']), int(annotation['y_min'])
    x_max, y_max = int(annotation['x_max']), int(annotation['y_max'])
    
    class_id = int(annotation['class_id'])
    color = class_to_color(class_id)
    
    cv2.rectangle(img,(x_min,y_min),(x_max,y_max), color, 2)

# draw all annotation bounding boxes on an image
def annotate_image(img, name, all_annotations):
    annotations = all_annotations[all_annotations['image_id'] == name]
    for index, row in annotations.iterrows():
        draw_bounding_box(img, row)

In [None]:
# Plot a single sample with all its bounding boxes
name = 'b75bba1e9dfb84fe1bd84c88c638c339'
img = load_image(name, IMG_ROOT)
annotate_image(img, name, data)
plot_image(img)

In [None]:
# Plot a grid of xray images, each with all its bounding boxes
def plot_annotated_grid(img_names, img_root, all_annotations, rows=5, cols=5):
    fig = plt.figure(figsize=(16,16))
    
    for i,name in enumerate(img_names):
        fig.add_subplot(rows,cols,i+1)
        img = load_image(name, img_root)
        annotate_image(img, name, all_annotations)
        plot_image(img)
        
    plt.show()

In [None]:
# filter out healthy samples
disease_data = data[data['class_id'] != 14]
filenames = list(set(disease_data['image_id'].values.tolist()))

# Plot 25 xray images with their bounding boxes
plot_annotated_grid(filenames[:25], IMG_ROOT, data, rows=5, cols=5)

In [None]:
# filter out healthy samples
disease_data = data[data['class_id'] != 14]
filenames = list(set(disease_data['image_id'].values.tolist()))

# Plot 25 xray images with their bounding boxes
plot_annotated_grid(filenames[25:50], IMG_ROOT, data, rows=5, cols=5)