In [None]:
import os
import csv
import numpy as np
from collections import namedtuple
import shutil
import cv2
import imageio
from PIL import Image
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.patches import Rectangle
from matplotlib.pyplot import imread, imshow, subplots, show
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [None]:
# Read in data/ProstateX-Findings-withlabel.csv to label with zone and ClinSig
csv_info = []

with open('data/ProstateX-Findings-withlabel.csv', newline='') as csvfile:
    reader = csv.reader(csvfile, delimiter=",", quotechar='"')
    _ = next(reader)
    for row in reader:
        patient, fid, pos, zone , clinsig = row
        patient = patient.split('-')[-1]
        csv_info.append((patient, fid, zone, clinsig))
        
with open('data/ProstateX-Findings-wolabels.csv', newline='') as csvfile:
    reader = csv.reader(csvfile, delimiter=",", quotechar='"')
    _ = next(reader)
    for row in reader:
        patient, fid, pos, zone = row
        patient = patient.split('-')[-1]
        clinsig = 'UNKNOWN'
        csv_info.append((patient, fid, zone, clinsig))

print(len(csv_info))
# for row in csv_info:
#     print(row)

In [None]:
# Load preprocessed data from lesions_cropped/t2_npy and lesions_cropped/adc_npy, ressizing to 32 by 32 arrays
# Place into pairs of t2 and adc images with their respective filenames and original side length

lesions = []
NamedLesion = namedtuple('NamedLesion', 'patient t2_image t2_size adc_image adc_size fid zone clinsig') 
t2_cropped = sorted(os.listdir('lesions_cropped/t2_npy'))
adc_cropped = sorted(os.listdir('lesions_cropped/adc_npy'))
if len(t2_cropped) != len(adc_cropped):
    raise ValueError("There's supposed to be the same number of T2 and ADC images")
input_size = len(t2_cropped)
# print(input_size)

def find_in_csv(patient, fid):
    if fid == 'Unnamed':
        fid = '1'
    for row in csv_info:
        if row[0]==patient and row[1]==fid:
            return (row[2], row[3]) #zone, clinsig

for i in range(input_size):
    t2_filename = 'lesions_cropped/t2_npy/' + t2_cropped[i]
    t2_img = np.load(t2_filename, allow_pickle=True)
    t2_img_resized = np.resize(t2_img, (32, 32))
    
    t2_root, _ = os.path.splitext(t2_filename)
    t2_patient, _, t2_fid = t2_root.split('/')[-1].split('+')
    t2_patient = t2_patient[-4:]
    
    adc_filename = 'lesions_cropped/adc_npy/' + adc_cropped[i]
    
    adc_img = np.load(adc_filename, allow_pickle=True)
    adc_img_resized = np.resize(adc_img, (32, 32))
    
    adc_root, _ = os.path.splitext(adc_filename)
    adc_patient, _, adc_fid = adc_root.split('/')[-1].split('+')
    adc_patient = adc_patient[-4:]
    
    if t2_patient != adc_patient or t2_fid != adc_fid:
        print("Patient ({} vs {}) or FID ({} vs {}) different between T2 and ADC images"
              .format(t2_patient, adc_patient, t2_fid, adc_fid))
        continue  
    patient=t2_patient
    fid=t2_fid
    
    obj = find_in_csv(patient, fid)
    if obj is None:
        print("Patient {} lesion FID {} cannot be found in CSV".format(patient, fid))
        continue
    else:
        zone, clinsig = obj
    
    lesion = NamedLesion(patient=patient,
                         t2_image=t2_img_resized, 
                         t2_size=t2_img.shape[0],
                         adc_image=adc_img_resized,
                         adc_size=adc_img.shape[0],
                         fid=fid,
                         zone=zone,
                         clinsig=clinsig
                        )
    

In [None]:
for lesion in lesions:
    t2 = lesion.t2_image
    adc = lesion.adc_image
    plt.imshow(t2)
    plt.show()

In [68]:
# random flipping, cropping and slightly shearing transformation to each T2 SPACE and ADC image pair to
# increase training data variety and model robustness

# https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html
# https://github.com/keras-team/keras/issues/3059

datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

for lesion in lesions:
    t2_arr = lesion.t2_image
    t2 = np.expand_dims(t2_arr, axis=2)
    t2 = t2.reshape((1,) + t2.shape)

    adc_arr = lesion.adc_image
    adc = np.expand_dims(adc_arr, axis=2)
    adc = adc.reshape((1,) + adc.shape)

    dirname = ''
    if os.path.exists(dir):
        shutil.rmtree(dir)
    os.makedirs(dir)
    
    # DIRNAME = patient + fid + zone + clinsig
    # FILENAME = patient + t2/adc + size + fid + zone + clinsig
    t2_prefix = 
    adc_prefix = 

    seed=np.random.randint(100)
    for x, val in zip(datagen.flow(t2, batch_size=1, seed=seed, save_to_dir='preview', save_prefix='t2', save_format='jpg'), range(25)):
        pass
    for x, val in zip(datagen.flow(adc, batch_size=1, seed=seed, save_to_dir='preview', save_prefix='adc', save_format='jpg'), range(25)):
        pass


In [None]:
# Shifted horizontally
data_generator = ImageDataGenerator(width_shift_range=0.3)
plot(data_generator)

# Shift vertically
data_generator = ImageDataGenerator(height_shift_range=0.3)
plot(data_generator)

# Brightness
data_generator = ImageDataGenerator(brightness_range=(0.1, 0.9))
plot(data_generator)

# Shear intensity
data_generator = ImageDataGenerator(shear_range=45.0)
plot(data_generator)

# Random zoom with in a range (<1 means shrink, >1 means enlarge)
data_generator = ImageDataGenerator(zoom_range=[0.5, 1.5])
plot(data_generator)

# Channel shift
data_generator = ImageDataGenerator(channel_shift_range=150.0)
plot(data_generator)

# Horizontal flip
data_generator = ImageDataGenerator(horizontal_flip=True)
plot(data_generator)

# Vertical flip
data_generator = ImageDataGenerator(vertical_flip=True)
plot(data_generator)
