<span style="background-color:#f6f8fa">last updated : 2022-Oct-24 / Yisak Kim (yisakk@snu.ac.kr)</span>

# 01_Resize_CheXpert

In [1]:
import os
import random
import shutil

from PIL import Image
from glob import glob
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
small_dir = "CheXpert-v1.0-small"
train_df = pd.read_csv(os.path.join(small_dir, "train.csv"))
valid_df = pd.read_csv(os.path.join(small_dir, "valid.csv"))

org_dir = "CheXpert-v1.0"
org_train_df = pd.read_csv(os.path.join(org_dir, "train.csv"))
org_valid_df = pd.read_csv(os.path.join(org_dir, "valid.csv"))

In [3]:
def rand_idx(df=train_df):
    return random.randint(0, len(df)-1)

def idx_to_img(img_idx, df=train_df):
    sample_img = Image.open(df.Path[img_idx])
    return sample_img

def multiple_plot(img_idxs, rows, cols, figsize=(30, 10)):
    fig, axs = plt.subplots(rows, cols, figsize=figsize)
    
    for plot_idx in range(rows*cols):
        img_idx = img_idxs[plot_idx]
        axs[plot_idx//cols, plot_idx%cols].imshow(np.array(idx_to_img(img_idx)), cmap='gray')
        axs[plot_idx//cols, plot_idx%cols].set_title(f'{img_idx}', fontsize=15)
        axs[plot_idx//cols, plot_idx%cols].axis("off")

In [4]:
train_df

Unnamed: 0,Path,Sex,Age,Frontal/Lateral,AP/PA,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices
0,CheXpert-v1.0-small/train/patient00001/study1/...,Female,68,Frontal,AP,1.0,,,,,,,,,0.0,,,,1.0
1,CheXpert-v1.0-small/train/patient00002/study2/...,Female,87,Frontal,AP,,,-1.0,1.0,,-1.0,-1.0,,-1.0,,-1.0,,1.0,
2,CheXpert-v1.0-small/train/patient00002/study1/...,Female,83,Frontal,AP,,,,1.0,,,-1.0,,,,,,1.0,
3,CheXpert-v1.0-small/train/patient00002/study1/...,Female,83,Lateral,,,,,1.0,,,-1.0,,,,,,1.0,
4,CheXpert-v1.0-small/train/patient00003/study1/...,Male,41,Frontal,AP,,,,,,1.0,,,,0.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223409,CheXpert-v1.0-small/train/patient64537/study2/...,Male,59,Frontal,AP,,,,-1.0,,,,,-1.0,0.0,1.0,,,
223410,CheXpert-v1.0-small/train/patient64537/study1/...,Male,59,Frontal,AP,,,,-1.0,,,,0.0,-1.0,,-1.0,,,
223411,CheXpert-v1.0-small/train/patient64538/study1/...,Female,0,Frontal,AP,,,,,,-1.0,,,,,,,,
223412,CheXpert-v1.0-small/train/patient64539/study1/...,Female,0,Frontal,AP,,,1.0,1.0,,,,-1.0,1.0,0.0,,,,0.0


In [5]:
def padding(pil_img):
    width, height = pil_img.size
    if width == height:
        return pil_img
    elif width > height:
        result = Image.new(pil_img.mode, (width, width))
        result.paste(pil_img, (0, (width - height) // 2))
        return result
    else:
        result = Image.new(pil_img.mode, (height, height))
        result.paste(pil_img, ((height - width) // 2, 0))
        return result
    
def resize_all(df, img_size, org_root, root_dir):
    os.makedirs(root_dir, exist_ok=True)

    for i, row in tqdm(df.iterrows()):

        pil_img = idx_to_img(i, df=df)
        pil_img.thumbnail((img_size, img_size))
        pad_img = padding(pil_img)

        new_path = row["Path"].replace(org_root, root_dir)
        dir_path = new_path.replace(new_path.split('/')[-1], "")
        os.makedirs(dir_path, exist_ok=True)
        pad_img.save(new_path)

In [6]:
img_size = 224
org_root = 'CheXpert-v1.0-small'
root_dir = 'CheXpert-v1.0-pad224'

resize_all(train_df, img_size, org_root, root_dir)

223414it [49:29, 75.23it/s] 


In [7]:
resize_all(valid_df, img_size, org_root, root_dir)

234it [00:01, 127.90it/s]


In [9]:
train_df['Path'] = [x.replace(org_root, root_dir) for x in train_df['Path']]
valid_df['Path'] = [x.replace(org_root, root_dir) for x in valid_df['Path']]

In [10]:
train_df

Unnamed: 0,Path,Sex,Age,Frontal/Lateral,AP/PA,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices
0,CheXpert-v1.0-pad224/train/patient00001/study1...,Female,68,Frontal,AP,1.0,,,,,,,,,0.0,,,,1.0
1,CheXpert-v1.0-pad224/train/patient00002/study2...,Female,87,Frontal,AP,,,-1.0,1.0,,-1.0,-1.0,,-1.0,,-1.0,,1.0,
2,CheXpert-v1.0-pad224/train/patient00002/study1...,Female,83,Frontal,AP,,,,1.0,,,-1.0,,,,,,1.0,
3,CheXpert-v1.0-pad224/train/patient00002/study1...,Female,83,Lateral,,,,,1.0,,,-1.0,,,,,,1.0,
4,CheXpert-v1.0-pad224/train/patient00003/study1...,Male,41,Frontal,AP,,,,,,1.0,,,,0.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223409,CheXpert-v1.0-pad224/train/patient64537/study2...,Male,59,Frontal,AP,,,,-1.0,,,,,-1.0,0.0,1.0,,,
223410,CheXpert-v1.0-pad224/train/patient64537/study1...,Male,59,Frontal,AP,,,,-1.0,,,,0.0,-1.0,,-1.0,,,
223411,CheXpert-v1.0-pad224/train/patient64538/study1...,Female,0,Frontal,AP,,,,,,-1.0,,,,,,,,
223412,CheXpert-v1.0-pad224/train/patient64539/study1...,Female,0,Frontal,AP,,,1.0,1.0,,,,-1.0,1.0,0.0,,,,0.0


In [11]:
train_df.to_csv(os.path.join(root_dir, "train.csv"), index=False, encoding='cp949')
valid_df.to_csv(os.path.join(root_dir, "valid.csv"), index=False, encoding='cp949')