In [2]:
import pandas as pd
import numpy as np


import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator 

import zipfile
import os
import shutil
import random

In [2]:
data = pd.read_csv("data.csv")
data.Fibrosis = data.Fibrosis.replace('-', np.NaN)
data.Equipment.value_counts()

In [5]:
# to upper character

# data['Equipment'] = data['Equipment'].apply(lambda x: x.upper())

In [4]:
data['Equipment'] = 'A'

In [5]:
def sort_by_equipment(equipment):
    return data[(data['Equipment'] == equipment)]['Fibrosis'].value_counts()

In [6]:
data.Fibrosis = pd.to_numeric(data.Fibrosis)

In [7]:
# divide the data by Equipment

def fill_zero(f):
    for i in range(len(f)):
        f[i] = f[i].zfill(4)
    
    return f

def sort_by_tool(equipment):
    f_neg = data[(data['Fibrosis']==0) & (data['Equipment']==equipment.upper())].number.astype(str).tolist()
    f_pos = data[(data['Fibrosis']!=0) & (data['Equipment']==equipment.upper())].number.astype(str).tolist()
    
    # fill the zeros (for foloder name)
    f_neg = fill_zero(f_neg)
    f_pos = fill_zero(f_pos)
    
    return f_neg, f_pos


def move_to_tmp(local_zip, equipment, base_loc):
    zip_ref = zipfile.ZipFile(local_zip, 'r')
    zip_ref.extractall('/tmp/Liver_'+equipment)
    zip_ref.close()

def make_folder(base_loc):
    try:
        os.mkdir(base_loc)
        os.mkdir(base_loc+'/training')
        os.mkdir(base_loc+'/training/f0')
        os.mkdir(base_loc+'/training/f1')
        os.mkdir(base_loc+'/testing')
        os.mkdir(base_loc+'/testing/f0')
        os.mkdir(base_loc+'/testing/f1')

    except OSError:
        pass

def move_to_folder(f_, folder_loc, split, base_loc):
    split = int(len(f_)*(1-split))
    
    f_train = f_[:split]
    f_test = f_[split:]
    
    print(folder_loc,':',len(f_train))
    print(folder_loc,':',len(f_test))
    
    new_name = 0
    for folder_name in f_train:
        for root, subdirs, files in os.walk(base_loc+'/'+folder_name):
            for f in files:
                if 'mask' not in f:
                    # file_name change
                    try:
                        file_to_change = os.path.join(root, f)
                        dst = str(new_name)+'.tif'
                        dst = os.path.join(root, dst)
                        # print(file_to_change, new_name)
                        
                        os.rename(file_to_change, dst)
                        new_name += 1
                        tmp = base_loc+'/training/'+folder_loc
                        # file move
                        shutil.move(dst, tmp)
                    except:
                        print('error')
    
    new_name = 0                    
    for folder_name in f_test:
        for root, subdirs, files in os.walk(base_loc+'/'+folder_name):
            for f in files:
                if 'mask' not in f:
                    # file_name change
                    try:
                        file_to_change = os.path.join(root, f)
                        dst = str(new_name)+'.tif'
                        # print(file_to_change, new_name)
                        
                        dst = os.path.join(root, dst)
                        os.rename(file_to_change, dst)
                        new_name += 1
                        tmp = base_loc+'/testing/'+folder_loc
                        # file move
                        shutil.move(dst, tmp)
                    except:
                        print('error')

In [8]:
def train_test_split(equipment, local_zip, split_size):
    equipment = equipment.upper()
    f_false, f_true = sort_by_tool(equipment)
    
    random.shuffle(f_false)
    random.shuffle(f_true)
    
    make_folder(base_loc)
    move_to_tmp(local_zip, equipment, base_loc)
    
    move_to_folder(f_false, 'f0', 0.2, base_loc)
    move_to_folder(f_true, 'f1', 0.2, base_loc)

In [9]:
def ImageGenerator(base_loc):
    TRAINING_DIR = base_loc+'/training'
    train_datagen = ImageDataGenerator(rescale = 1./255.)

    train_generator = train_datagen.flow_from_directory(TRAINING_DIR,
                                                       batch_size=64,
                                                       class_mode='binary',
                                                       target_size=(600,800),
                                                       color_mode='grayscale') # target_size = (height, width)

    TESTING_DIR = base_loc+'/testing'
    test_datagen = ImageDataGenerator(rescale = 1.0/255.)

    test_generator = train_datagen.flow_from_directory(TESTING_DIR,
                                                       batch_size=32,
                                                       class_mode='binary',
                                                       target_size=(600,800), 
                                                       color_mode='grayscale')
    
    return train_generator, test_generator

In [10]:
# change
equipment = 'A' 
local_zip = '../../../Data/Liver.zip'
base_loc = '/tmp/Liver_' + equipment

In [11]:
train_test_split(equipment, local_zip, 0.2)

f0 : 116
f0 : 30
f1 : 224
f1 : 56


In [14]:
# training

# error files:
# 1881/mask/1881-0006.tif
# 1813/mask/1813-0002.tif
# 2103/mask/2013-0001.tif

In [12]:
print("Training Fibrosis 0:", len(os.listdir(base_loc+'/training/f0')))
print("Testing Fibrosis 0:", len(os.listdir(base_loc+'/testing/f0')))
print("-"*25)
print("Training Fibrosis 1:", len(os.listdir(base_loc+'/training/f1')))
print("Testing Fibrosis 1:", len(os.listdir(base_loc+'/testing/f1')))

Training Fibrosis 0: 855
Testing Fibrosis 0: 254
-------------------------
Training Fibrosis 1: 1595
Testing Fibrosis 1: 377


In [13]:
train_generator, test_generator = ImageGenerator(base_loc)

Found 2450 images belonging to 2 classes.
Found 631 images belonging to 2 classes.


## VGG Model

In [27]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten, Input, Conv2D, multiply, LocallyConnected2D, Lambda
from tensorflow.keras.models import Model
in_lay = Input(60,80)
base_pretrained_model = VGG16(input_shape = (60, 80, 3),
                              include_top = False,
                              weights='imagenet')

for layer in base_pretrained_model.layers:
    layer.trainable = False

pt_features = base_pretrained_model.get_layer('block3_conv1').output

from tensorflow.keras.layers import BatchNormalization
bn_features = BatchNormalization()(pt_features)

# here we do an attention mechanism to turn pixels in the GAP on an off

attn_layer = Conv2D(64, kernel_size = (1,1), padding = 'same', activation = 'relu')(bn_features)
attn_layer = Conv2D(16, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
attn_layer = LocallyConnected2D(1, 
                                kernel_size = (1,1), 
                                padding = 'valid', 
                                activation = 'sigmoid')(attn_layer)

# fan it out to all of the channels
up_c2_w = np.ones((1, 1, 1, 256))
up_c2 = Conv2D(256, kernel_size = (1,1), padding = 'same', 
               activation = 'linear', use_bias = False, weights = [up_c2_w])
up_c2.trainable = False
attn_layer = up_c2(attn_layer)

mask_features = multiply([attn_layer, bn_features])
gap_features = GlobalAveragePooling2D()(mask_features)
gap_mask = GlobalAveragePooling2D()(attn_layer)

# to account for missing values from the attention model
gap = Lambda(lambda x: x[0]/x[1], name = 'RescaleGAP')([gap_features, gap_mask])
gap_dr = Dropout(0.5)(gap)
dr_steps = Dropout(0.25)(Dense(1024, activation = 'elu')(gap_dr))
out_layer = Dense(1, activation = 'linear')(dr_steps) # linear is what 16bit did

bone_age_model = Model(inputs = [in_lay], outputs = [out_layer])
from tensorflow.keras.metrics import mean_absolute_error
def mae_months(in_gt, in_pred):
    return mean_absolute_error(boneage_div*in_gt, boneage_div*in_pred)

custom_model.compile(optimizer = 'adam', loss = 'mse',
                           metrics = [mae_months])

custom_model.summary()

ValueError: Graph disconnected: cannot obtain value for tensor Tensor("input_38:0", shape=(None, 60, 80, 3), dtype=float32) at layer "block1_conv1". The following previous layers were accessed without issue: []

In [17]:
# delete the data

shutil.rmtree('/tmp/Liver_'+equipment)

In [None]:
import os, signal
train_generator, test_generator
os.kill(os.getpid(),signal.SIGKILL)