In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from pandas.core.frame import DataFrame
import pandas as pd
from joblib import Parallel, delayed
import shutil

In [None]:
def load_labels(label_file):
    """Loads image filenames, classes, and bounding boxes"""
    fnames, classes, bboxes = [], [], []
    with open(label_file, 'r') as f:    
        for line in f.readlines():     
            fname, cls, xmin, ymin, xmax, ymax = line.strip('\n').split()
            fnames.append(fname)
            classes.append(int(cls))
            bboxes.append((int(xmin), int(ymin), int(xmax), int(ymax)))
    return fnames, classes, bboxes

Let's try loading some images from the dataset and displaying their class labels and bounding boxes:

In [None]:
# Set path
image_dir = '/kaggle/input/covidxct/3A_images'
label_file = '/kaggle/input/covidxct/train_COVIDx_CT-3A.txt'

# Load labels
fnames, classes, bboxes = load_labels(label_file)

In [None]:
len(fnames)

In [None]:
df_fnames=DataFrame(fnames) 
df_classes=DataFrame(classes) 
df_bboxes=DataFrame(bboxes) 
data=pd.concat([df_fnames,df_classes,df_bboxes],axis=1)

In [None]:
data.columns = ['image_id', 'class', 'xmin','ymin','xmax','ymax']
data

In [None]:
print(data.iloc[0:115000].shape)
print(data.iloc[115000:230000].shape)
print(data.iloc[230000:].shape)

In [None]:
!mkdir -p /tmp/train_images && mkdir -p /tmp/test_images && mkdir -p /tmp/val_images

In [None]:
def crop_image_val(row):
    #image_path = row['image_path']
    image_dir = '/kaggle/input/covidxct/3A_images'
    image_path = os.path.join(image_dir, row['image_id'])
    save_dir = '/tmp/train_images'       #File Storage Path
    image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
    xmin = row['xmin']
    ymin = row['ymin']
    xmax = row['xmax']
    ymax = row['ymax']
    
    img = image[ymin:ymax, xmin:xmax] # Cropped images
    cv2.imwrite(f'{save_dir}/{row.image_id}', img) # Save images
    return

In [None]:
_ = Parallel(n_jobs=-1, backend='threading')(delayed(crop_image_val)(row)\
                                             for _, row in tqdm(data.iterrows(), total=len(data), desc='val'))

In [None]:
shutil.make_archive(base_name='/kaggle/working/train_images',
                    format='zip',
                    root_dir='/tmp/',
                    base_dir='train_images')

In [None]:
# Select cases to view
np.random.seed(27)   
indices = np.random.choice(list(range(len(fnames))), 9)  

# Show a grid of 9 images
fig, axes = plt.subplots(3, 3, figsize=(16, 16))  
class_names = ('Normal', 'Pneumonia', 'COVID-19')
for index, ax in zip(indices, axes.ravel()):   
    image_file = os.path.join(image_dir, fnames[index])   
    image = cv2.imread(image_file, cv2.IMREAD_UNCHANGED)
    image = np.stack([image]*3, axis=-1)  # make image 3-channel
    bbox = bboxes[index]
    cv2.rectangle(image, bbox[:2], bbox[2:], color=(255, 0, 0), thickness=3)
    # Display
    cls = classes[index]
    ax.imshow(image)
    ax.set_title('Class: {} ({})'.format(class_names[cls], cls))
plt.show()

Cutting show

In [None]:
pwd

In [None]:
# Select cases to view
np.random.seed(27)   
indices = np.random.choice(list(range(len(fnames))), 9)  

# Show a grid of 9 images
class_names = ('Normal', 'Pneumonia', 'COVID-19')
for index, ax in zip(indices, axes.ravel()):
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    image_file = os.path.join(image_dir, fnames[index])   
    ID = fnames[index].split(".")[0]
    image = cv2.imread(image_file, cv2.IMREAD_UNCHANGED)
    image = np.stack([image]*3, axis=-1)  # make image 3-channel
    bbox = bboxes[index]
    dim = np.sqrt(np.prod(image.shape[:2]))
    line_thickness = int(2/512*dim)
    cv2.rectangle(image, bbox[:2], bbox[2:], color=(255, 0, 0), thickness=line_thickness)

    # Display
    cls = classes[index]
    plt.imshow(image)
    plt.title('Before;Class: {} ({})'.format(class_names[cls], cls))
    plt.axis('off')
    plt.subplot(1, 2, 2)
    xmin, ymin, xmax, ymax = bbox
    plt.imshow(image[ymin:ymax, xmin:xmax])
    plt.title('After;Class: {} ({})'.format(class_names[cls], cls))
    plt.axis('off')
    plt.suptitle(f'id: {ID}', y=1.04)
    plt.tight_layout()
    plt.show()