In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from glob import glob
import os
import pandas as pd
import numpy as np

from pathlib import Path
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt

In [None]:
# Coarse lesion types 1~8: bone, abdomen, mediastinum, liver, lung, kidney, soft tissue, pelvis
# Only the lesions in the val and test sets were annotated with others denoted as -1.

In [None]:
HU_FACTOR = 32768

data_dir = Path("/home/liushifeng/Desktop/DeepLesion Dataset/")
df = pd.read_csv(data_dir / "DL_info.csv")

In [None]:
df['scan_name'] = df['File_name'].str.split("_").str[:-1].str.join("_")
df['file_name'] = df['File_name'].str.split("_").str[-1]
df['image_path'] = df['scan_name'] + "/" + df['file_name']

1. unzip png zip
2. get relevant slices
3. save them in compressed format

In [None]:
# check length of string lists
df["Measurement_coordinates"].str.split(",").apply(len).value_counts()
df['Bounding_boxes'].str.split(",").apply(len).value_counts()

In [None]:
df.head(3)

* filter out `noisy` lesions
* check bounding boxes

In [None]:
proj_folder = Path("/home/liushifeng/Desktop/DeepLesion Dataset/")

In [None]:
imgs = []
crops = []
n = 0

data_folders = [proj_folder / f for f in os.listdir(proj_folder) if f.startswith("Images_png_") and ".zip" not in f]

for data_folder in data_folders:
    print(data_folder)
    # data_folder = Path("/home/liushifeng/Desktop/DeepLesion Dataset/Images_png_56/")
    scan_folders = [data_folder / "Images_png" / s for s in os.listdir(Path(data_folder) / "Images_png")]
    
    for scan_folder in scan_folders:
        df_scan = df[df['scan_name'].eq(scan_folder.stem)]
        if df_scan.empty:
            continue
            
        for i in range(len(df_scan)):
            # slice_paths = [scan_folder / s for s in os.listdir(scan_folder)]
            df_slice = df_scan.iloc[i]
    
            key_slice_path = scan_folder / df_slice['file_name']
            img = Image.open(key_slice_path)
            bbox = process_bbox_str(df_slice['Bounding_boxes'])

            imgs.append(np.array(img, dtype=int).flatten())
            # display(display_img(img, bbox))
            
            crop = crop_lesion(img, bbox)
            crops.append(np.array(crop, dtype=int).flatten())
            break
            
        n += 1
        if n > 999:
            print("1000 images")
            break

len(crops)

In [None]:
key_slice_path

In [None]:
img = Image.open("/home/liushifeng/Desktop/DeepLesion Dataset/Images_png_55/Images_png/004263_01_01/236.png")

In [None]:
img_hu = (np.array(img, dtype=int) - HU_FACTOR)

In [None]:
lung_window = (-1024, -300)
lung = clip_normalize(img_hu, *lung_window, remove_high=True)
plt.imshow(lung, cmap="gray"); plt.axis("off");

In [None]:
soft_window = (-300, 400)
soft = clip_normalize(img_hu, *soft_window, remove_high=True)
plt.imshow(soft, cmap="gray"); plt.axis("off");

In [None]:
bone_window = (400, 1000)
bone = clip_normalize(img_hu, *bone_window, remove_high=False)
plt.imshow(bone, cmap="gray"); plt.axis("off");

In [None]:
base = np.dstack([lung] * 3)
base[:, :, 0] = (base[:, :, 0] * soft)
# base[:, :, 1] = (base[:, :, 1] * soft)

base[:, :, 2] = (base[:, :, 2] * bone)
plt.imshow(base)

In [None]:
# Image.fromarray(np.array(soft*255, dtype=np.uint8)).save("test.png")
# np.dstack([soft] * 3)
img_rgb = np.stack([soft, bone, lung], axis=2)
plt.imshow(img_rgb); plt.axis("off");

In [None]:
img_hu

In [None]:
key_slice_path

In [None]:
display_img(img_rgb, bbox)

In [None]:
img_uint8 = Image.fromarray(np.array(img_rgb * 255, dtype=np.uint8))
img_uint8.save("rgb.jpg", subsampling=0, quality=95)

In [None]:
def clip_normalize(img_arr, min_value, max_value, remove_high=True):
    """
    remove_high: values > max_value is set to min_value i.e. 0 after normalization
    """
    img_arr = img_arr.copy()
    if remove_high:
        img_arr[img_arr > max_value] = min_value
    img_arr = img_arr.clip(min_value, max_value)
    return (img_arr - min_value) / (max_value - min_value)

def process_bbox_str(bbox):
    return [float(x) for x in bbox.split(", ")]

def crop_lesion(img, bbox):
    return img.crop(bbox)

def display_img(img, bbox=None):
    # img_arr = np.array(img, dtype=int)
    # img_arr_uint8 = np.array(((img_arr - img_arr.max()) / (img_arr.max() - img_arr.min())) * 255, dtype=np.uint8)
    img_arr = np.array(img)
    img_arr_uint8 = np.array(img_arr * 255, dtype=np.uint8)
    img = Image.fromarray(img_arr_uint8).convert("RGB")
    if bbox is not None:            
        draw = ImageDraw.Draw(img)
        draw.rectangle(bbox, outline="white")
    return img

In [None]:
df_imgs = pd.Series(np.hstack(imgs) - HU_FACTOR)
df_imgs[df_imgs.between(-1200, 1000)].hist(bins=200);


In [None]:
df_crops = pd.Series(np.hstack(crops) - HU_FACTOR)

df_crops[df_crops.between(-2000, 500)].hist(bins=200);

In [None]:
img_path = "/home/liushifeng/Desktop/DeepLesion Dataset/Images_png_56/Images_png/004459_01_01/021.png"

In [None]:
pd.Series(img_arr.flatten() - HU_FACTOR).hist(bins=100);