In [1]:
import numpy as np
import pandas as pd
import os
from PIL import Image, ImageEnhance, ImageFilter, ImageOps
from skimage import exposure
from skimage.color import rgb2gray
import cv2
import math
from tqdm import tqdm

In [2]:
def preprocessing_function(edge_enhance, equalization, blur, image):

    # apply gaussian blur to the image
    image = cv2.GaussianBlur(image, (blur, blur), 0 )
    # edge enhance using PIL
    if edge_enhance:
        image = Image.fromarray(image)
        image = image.filter(ImageFilter.EDGE_ENHANCE)
        image = np.array(image)
    else:
        image = image
    
    # equalization
    if equalization == 'histogram':
        image = cv2.equalizeHist(image)
    elif equalization == 'adaptive':
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
        image = clahe.apply(image)
    else: 
        image = image
    # rotate
    image = image
    image1 = cv2.flip(image, 1)

    return image, image1

In [3]:
def crop_image(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # threshold 
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
    hh, ww = thresh.shape

    # make bottom 2 rows black where they are white the full width of the image
    thresh[hh-3:hh, 0:ww] = 0

    # get bounds of white pixels
    white = np.where(thresh==255)
    xmin, ymin, xmax, ymax = np.min(white[1]), np.min(white[0]), np.max(white[1]), np.max(white[0])
    hd = ymax-ymin
    wd = xmax-xmin
    g = 0 
    j = 0
    m = 0
    f = 0
    if hd > wd and (ymin <2 or ymax + 2 > img.shape[0]):
        g = 0
        j = 0
        t = (hd-wd) / 2 
        if xmin - math.floor(t) >= 0 and xmax + math.ceil(t) <= img.shape[1]:
            m = math.floor(t)
            f = math.ceil(t)
        elif xmin - math.floor(t) >= 0:
            f = img.shape[1]-xmax
            m = ((2*t)-f)
        else:
            m = xmin
            f = ((2*t)-m)
    elif hd > wd and (ymin >= 2 or ymax + 2 <= img.shape[0]):
        g = 2
        j = 2
        t = ((hd+4)-wd) / 2
        if xmin - math.floor(t) >= 0 and xmax + math.ceil(t) <= img.shape[1]:
            m = math.floor(t)
            f = math.ceil(t)
        elif xmin - math.floor(t) >= 0:
            f = img.shape[1]-xmax
            m = ((2*t)-f)
        else:
            m = xmin
            f = ((2*t)-m)
    elif wd > hd and (xmin < 2 or xmax +2 > img.shape[1]):
        m = 0 
        f = 0
        t = (wd-hd) / 2 
        if ymin - math.floor(t) >= 0 and ymax + math.ceil(t) <= img.shape[0]:
            g = math.floor(t)
            j = math.ceil(t)
        elif ymin - math.floor(t) >= 0:
            j = img.shape[0]-ymax
            g = ((2*t)-j)
        else:
            g = ymin
            j = ((2*t)-g)
    elif wd > hd and (xmin >= 2 or xmax + 2 <= img.shape[1]):
        m = 2 
        f = 2
        t = ((wd+4)-hd) / 2 
        if ymin - math.floor(t) >= 0 and ymax + math.ceil(t) <= img.shape[0]:
            g = math.floor(t)
            j = math.ceil(t)
        elif ymin - math.floor(t) >= 0:
            j = img.shape[0]-ymax
            g = ((2*t)-j)
        else:
            g = ymin
            j = ((2*t)-g)
    elif wd == hd:
      if wd + 4 <= img.shape[1] and hd + 4 <= img.shape[0]:
        g = 2 
        j = 2
        m = 2
        f = 2
      else: 
        g = 0 
        j = 0
        m = 0
        f = 0
    # crop the image at the bounds adding back the two blackened rows at the bottom
    if hd + 4 > img.shape[1] or wd + 4 > img.shape[0]:
        crop = img[:,:,0]
    elif hd == 0 or wd == 0:
        crop = img[:,:,0]
    else:
        xx = int(ymin-g)
        xy = int(ymax+j)
        yx = int(xmin-m)
        yy = int(xmax+f)
        crop = img[xx:xy, yx:yy,0]
    if crop.shape[0] != crop.shape[1]:
      crop = img[:,:,0]
    return crop

In [None]:
data_dir = '/dmc/ml_storage/machine_learning/Final_ML_CBS/data/age/'
target_path = '/dmc/ml_storage/machine_learning/Final_ML_CBS/data/age_prep/'


# equalization: histogram or adaptive
preprocessing_args = {
    'edge_enhance': True,
    'equalization': 'histogram',
    'blur': 7
}
errors = []
shape = []
for folder in os.listdir(data_dir):
    # add folder to the data_dir path
    folder_path = data_dir + folder + '/'
    for image in os.listdir(folder_path):
        try:
            img = cv2.imread(folder_path + image, cv2.COLOR_BGR2GRAY)
            # cropping 
            print(folder_path + image)
            img = crop_image(img)
            # check that the image has no 0 in its shape, if so. reload it
            if 0 in img.shape:
                img = cv2.imread(folder_path + image, cv2.COLOR_BGR2GRAY)
                img = img[:,:,0]

            temp, temp1 = preprocessing_function(preprocessing_args['edge_enhance'],  
                                          preprocessing_args['equalization'],
                                          preprocessing_args['blur'],
                                          img)

            # add a _i to the image name
            path = target_path + folder + '/' + image[:-4] + '_0.png'
            cv2.imwrite(path, temp)
            path1 = target_path + folder + '/' + image[:-4] + '_1.png'
            cv2.imwrite(path1, temp1)
            shape.append([temp.shape,temp1.shape])
        except Exception as e:
            errors.append([e, folder_path + image])
            print("error")
            pass

In [None]:
errors = pd.DataFrame(errors, columns=['error', 'path'])
errors.to_csv('errors.csv', index=False)

In [None]:
# getthe first error
errors.iloc[0,0]

In [None]:
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
for device in gpu_devices:
    tf.config.experimental.set_memory_growth(device, True)

In [None]:
size = 224
input_dim = (size, size, 1)
classes = 30
batch_size = 8
data_dir = '/dmc/ml_storage/machine_learning/Final_ML_CBS/data/cohorts_5_aug/'

In [None]:
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  subset='training',
  validation_split=0.2,
  labels = 'inferred',
  label_mode='categorical',
  seed=42,
  image_size=(size, size),
  batch_size=batch_size,
  color_mode='grayscale')

In [None]:
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  subset='validation',
  validation_split=0.2,
  labels = 'inferred',
  label_mode='categorical',
  seed=42,
  image_size=(size, size),
  batch_size=batch_size, 
  color_mode='grayscale')

In [None]:
# show the class names
class_names = train_ds.class_names
print(class_names)
# show the shape of the training and validation datasets

for image_batch, labels_batch in train_ds:
    print(image_batch.shape)
    print(labels_batch.shape)
    break

In [None]:
import matplotlib.pyplot as plt
import numpy as np

plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
  for i in range(batch_size):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(class_names[np.argmax(labels[i])])
    plt.axis("off")

In [None]:
from tensorflow.keras.layers import Input, Conv2D, Dropout, Flatten, BatchNormalization, MaxPooling2D, Dense
from tensorflow.keras.models import Model


input = Input(shape=input_dim)

conv1 = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same')(input)
conv1 = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same')(conv1)
conv1 = Dropout(0.1)(conv1)
mpool1 = MaxPooling2D()(conv1)

conv2 = Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same')(mpool1)
conv2 = Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same')(conv2)
conv2 = Dropout(0.2)(conv2)
mpool2 = MaxPooling2D()(conv2)

conv3 = Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same')(mpool2)
conv3 = Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same')(conv3)
conv3 = Dropout(0.3)(conv3)
mpool3 = MaxPooling2D()(conv3)

conv4 = Conv2D(512, 3, activation='relu', padding='same')(mpool3)
conv4 = Conv2D(512, 3, activation='relu', padding='same')(conv4)
conv4 = Dropout(0.4)(conv4)
mpool4 = MaxPooling2D()(conv4)
# get the output of the base model

# Net 1 for age
net_1_conv1 = Conv2D(512, kernel_size=(3, 3), activation='relu', padding = 'same')(mpool4)
net_1_conv2 = Conv2D(512, kernel_size=(3, 3), activation='relu', padding = 'same')(net_1_conv1)
net_1_drop1 = Dropout(0.2)(net_1_conv2)
net_1_pool1 = MaxPooling2D(pool_size = (3, 3), padding = 'same')(net_1_drop1)

net_1_conv3 = Conv2D(512, kernel_size=(3, 3), strides=(1,1), activation='relu', padding = 'same')(net_1_pool1)
net_1_drop2 = Dropout(rate=0.2)(net_1_conv3)

net_1_conv4 = Conv2D(1024, kernel_size=(3, 3), strides=(1,1), activation='relu')(net_1_drop2)
net_1_drop3 = Dropout(rate=0.4)(net_1_conv4)

net_1_conv5 = Conv2D(1024, kernel_size=(3, 3), strides=(1,1), activation='relu')(net_1_drop3)
net_1_drop4 = Dropout(rate=0.2)(net_1_conv5)

# Inference layer
net_1_batch = BatchNormalization()(net_1_drop4)
flatten1 = Flatten()(net_1_batch)
out1 = Dense(classes, activation='softmax', name='age')(flatten1)

model = Model(inputs=input, outputs=out1)
model.summary()

In [None]:
# write a function to delete the model and all layers
def delete_model(model):
    del model
    tf.keras.backend.clear_session()


In [None]:
callbacks = [ModelCheckpoint('cohorts.hdf5', verbose=1, save_best_only=True, save_weights_only=True,
                             monitor = 'val_loss'),
             EarlyStopping(monitor = 'val_loss', patience = 16, restore_best_weights = True)]

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tf.keras.metrics.AUC()])

In [None]:
AUTOTUNE = tf.data.AUTOTUNE
# just use a sample of the training data for now


train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
epochs = 32
history = model.fit(train_ds,  validation_data=val_ds,
            batch_size=batch_size, 
            epochs=epochs, 
            shuffle=True,
            callbacks=callbacks)

In [None]:
plt.figure(figsize=(20, 10))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.legend()

In [None]:
# Create a table with accuracy, precision, recall and f1-score on the val set
from sklearn.metrics import classification_report
import numpy as np
import pandas as pd
# load the best model
model.load_weights('cohorts.hdf5')

# get the predictions
y_pred = model.predict(val_ds)
y_pred = np.argmax(y_pred, axis=1)

# get the true labels
y_true = []
for images, labels in val_ds:
    y_true.append(labels.numpy())
y_true = np.concatenate(y_true)
y_true = np.argmax(y_true, axis=1)

# get the classification report
report = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
report = pd.DataFrame(report).transpose()

output = [report, history.history['loss'], history.history['val_loss']]

# conert the output to a dataframe
output = pd.DataFrame(output)
# name the columns
output.index = ['classification_report', 'train_loss', 'val_loss']

output.to_csv('None_False_ad.csv')