In [76]:
SEED = 9

import sys
sys.path.append('../..')

import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split
import glob
import shutil
from utils.fun import reverseVector, drawBinary
import matplotlib.pyplot as plt

# PADCHEST dataset

## Split into Train (70%), Val (15%), Test (15%) -> Quick Preprocess -> Save to Set Dirs

In [4]:
all_examples = [f.split('/')[-1] for f in glob.glob('All_Images/*.png')]
all_examples[:5], len(all_examples)

(['1256842362861431725328351539259305635_u1qifz.png',
  '10155709300728342918543955138521808206_f7cj92.png',
  '10287653421930576798556842610982533460_vpbhw6.png',
  '10383960670432673238945376919735423432_hd3moq.png',
  '10996416492353037588312781035930080694_8rstz0.png'],
 137)

In [5]:
train, val_test = train_test_split(all_examples, test_size=0.3, random_state=SEED)
val, test = train_test_split(val_test, test_size=0.5, random_state=SEED)

len(train), len(val), len(test)

(95, 21, 21)

In [6]:
assert bool(set(train) & set(val)) == False
assert bool(set(train) & set(test)) == False

In [7]:
with open('train_files.txt', 'w') as f:
    for line in train:
        f.write(f"{line}\n")

with open('val_files.txt', 'w') as f:
    for line in val:
        f.write(f"{line}\n")
        
with open('test_files.txt', 'w') as f:
    for line in test:
        f.write(f"{line}\n")

In [102]:
def preprocess(set_, flist):
    try:
        os.mkdir(set_+'/Images')
        os.mkdir(set_+'/Masks')
        os.mkdir(set_+'/Landmarks')
    except:
        pass
    
    i = 1

    for file in flist:
        print('\r',i,'of', len(flist),end='')
        
        # preprocess images

        img = cv2.imread('All_Images/'+file, 0)

        gray = 255*(img > 1) # To invert the text to white
        coords = cv2.findNonZero(gray) # Find all non-zero points (text)

        x, y, w, h = cv2.boundingRect(coords) # Find minimum spanning bounding box
        cropimg = img[y:y+h, x:x+w] # Crop the image - note we do this on the original image

        shape = cropimg.shape

        if shape[0] < shape[1]:
            pad = (shape[1] - shape[0])    

            if pad % 2 == 1:
                pad = pad // 2
                pad_y = [pad, pad+1]
            else:
                pad = pad // 2
                pad_y = [pad, pad]

            pad_x = [0, 0]
        elif shape[1] < shape[0]:
            pad = (shape[0] - shape[1]) 

            if pad % 2 == 1:
                pad = pad // 2
                pad_x = [pad, pad+1]
            else:
                pad = pad // 2
                pad_x = [pad, pad]

            pad_y = [0, 0]

        img = np.pad(cropimg, pad_width = [pad_y, pad_x])    

        if img.shape[0] != img.shape[1]:
            print('Error padding image')
            break

        img_ = cv2.resize(img, [1024, 1024])

        cv2.imwrite(set_+'/Images/'+file, img_)
        
        # preprocess landmarks
        
        RL = np.load('../Chest-xray-landmark-dataset-main/landmarks/RL/'+file.replace('.png', '.npy'))
        LL = np.load('../Chest-xray-landmark-dataset-main/landmarks/LL/'+file.replace('.png', '.npy'))
        H = np.load('../Chest-xray-landmark-dataset-main/landmarks/H/'+file.replace('.png', '.npy'))
        
        landmarks = np.concatenate([RL, LL, H], axis=0).reshape(-1)
        assert landmarks.shape == (240,)
        np.save(set_+'/Landmarks/'+file.replace('.png', '.npy'), landmarks)
        np.save('All_Landmarks/'+file.replace('.png', '.npy'), landmarks)
        
        # preprocess masks
        
        p1, p2, h, c1, c2 = reverseVector(landmarks)
        lungs = drawBinary(np.zeros([1024,1024]), p1)
        lungs = drawBinary(lungs, p2)
        heart = drawBinary(np.zeros([1024,1024]), h)

        mask = np.zeros([1024, 1024])
        mask[heart == 255] = 2
        mask[lungs == 255] = 1
        np.save(set_+'/Masks/'+file.replace('.png', '.npy'), mask)
        
        np.save('All_Masks/'+file.replace('.png', '.npy'), mask)
        
        plt.imshow(img_)
        plt.imshow(mask, alpha=0.5)
        plt.scatter(*landmarks.reshape(-1,2).T, c='r', s=1)
        plt.savefig('Visualise_Data/'+file, dpi=200)
        plt.close()

        i = i+1

In [103]:
trainlist = open('train_files.txt','r').read().splitlines()
preprocess('Train', trainlist)

 95 of 95

In [104]:
vallist = open('val_files.txt','r').read().splitlines()
preprocess('Val', vallist)

 21 of 21

In [105]:
testlist = open('test_files.txt','r').read().splitlines()
preprocess('Test', testlist)

 21 of 21