In [1]:
import pandas as pd
import numpy as np


import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator 

import zipfile
import os
import shutil
import random

In [2]:
data = pd.read_csv("data.csv")
data.Fibrosis = data.Fibrosis.replace('-', np.NaN)
data.Equipment.value_counts()

IU22        101
LOGIQE9      99
iu22         78
EUB-7500     70
eub-7500     45
S2000        28
s2000         5
Name: Equipment, dtype: int64

In [3]:
# to upper character

# data['Equipment'] = data['Equipment'].apply(lambda x: x.upper())

In [4]:
data['Equipment'] = 'A'

In [5]:
def sort_by_equipment(equipment):
    return data[(data['Equipment'] == equipment)]['Fibrosis'].value_counts()

In [6]:
data.Fibrosis = pd.to_numeric(data.Fibrosis)

In [7]:
# divide the data by Equipment

def fill_zero(f):
    for i in range(len(f)):
        f[i] = f[i].zfill(4)
    
    return f

def sort_by_tool(equipment):
    f_neg = data[(data['Fibrosis']==0) & (data['Equipment']==equipment.upper())].number.astype(str).tolist()
    f_pos = data[(data['Fibrosis']!=0) & (data['Equipment']==equipment.upper())].number.astype(str).tolist()
    
    # fill the zeros (for foloder name)
    f_neg = fill_zero(f_neg)
    f_pos = fill_zero(f_pos)
    
    return f_neg, f_pos


def move_to_tmp(local_zip, equipment, base_loc):
    zip_ref = zipfile.ZipFile(local_zip, 'r')
    zip_ref.extractall('/tmp/Liver_'+equipment)
    zip_ref.close()

def make_folder(base_loc):
    try:
        os.mkdir(base_loc)
        os.mkdir(base_loc+'/training')
        os.mkdir(base_loc+'/training/f0')
        os.mkdir(base_loc+'/training/f1')
        os.mkdir(base_loc+'/testing')
        os.mkdir(base_loc+'/testing/f0')
        os.mkdir(base_loc+'/testing/f1')

    except OSError:
        pass

def move_to_folder(f_, folder_loc, split, base_loc):
    split = int(len(f_)*(1-split))
    
    f_train = f_[:split]
    f_test = f_[split:]
    
    print(folder_loc,':',len(f_train))
    print(folder_loc,':',len(f_test))
    
    new_name = 0
    for folder_name in f_train:
        for root, subdirs, files in os.walk(base_loc+'/'+folder_name):
            for f in files:
                if 'mask' not in f:
                    # file_name change
                    try:
                        file_to_change = os.path.join(root, f)
                        dst = str(new_name)+'.tif'
                        dst = os.path.join(root, dst)
                        # print(file_to_change, new_name)
                        
                        os.rename(file_to_change, dst)
                        new_name += 1
                        tmp = base_loc+'/training/'+folder_loc
                        # file move
                        shutil.move(dst, tmp)
                    except:
                        print('error')
    
    new_name = 0                    
    for folder_name in f_test:
        for root, subdirs, files in os.walk(base_loc+'/'+folder_name):
            for f in files:
                if 'mask' not in f:
                    # file_name change
                    try:
                        file_to_change = os.path.join(root, f)
                        dst = str(new_name)+'.tif'
                        # print(file_to_change, new_name)
                        
                        dst = os.path.join(root, dst)
                        os.rename(file_to_change, dst)
                        new_name += 1
                        tmp = base_loc+'/testing/'+folder_loc
                        # file move
                        shutil.move(dst, tmp)
                    except:
                        print('error')

In [8]:
def train_test_split(equipment, local_zip, split_size):
    equipment = equipment.upper()
    f_false, f_true = sort_by_tool(equipment)
    
    random.shuffle(f_false)
    random.shuffle(f_true)
    
    make_folder(base_loc)
    move_to_tmp(local_zip, equipment, base_loc)
    
    move_to_folder(f_false, 'f0', 0.2, base_loc)
    move_to_folder(f_true, 'f1', 0.2, base_loc)

In [9]:
def ImageGenerator(base_loc):
    TRAINING_DIR = base_loc+'/training'
    train_datagen = ImageDataGenerator(rescale = 1./255.)

    train_generator = train_datagen.flow_from_directory(TRAINING_DIR,
                                                       batch_size=64,
                                                       class_mode='binary',
                                                       target_size=(600,800),
                                                       color_mode='rgb') # target_size = (height, width)

    TESTING_DIR = base_loc+'/testing'
    test_datagen = ImageDataGenerator(rescale = 1.0/255.)

    test_generator = train_datagen.flow_from_directory(TESTING_DIR,
                                                       batch_size=32,
                                                       class_mode='binary',
                                                       target_size=(600,800), 
                                                       color_mode='rgb')
    
    return train_generator, test_generator

In [10]:
# change
equipment = 'A' 
local_zip = '../../../Data/Liver10.zip'
base_loc = '/tmp/Liver_' + equipment

In [11]:
train_test_split(equipment, local_zip, 0.2)

f0 : 116
f0 : 30
f1 : 224
f1 : 56


In [12]:
# training

# error files:
# 1881/mask/1881-0006.tif
# 1813/mask/1813-0002.tif
# 2103/mask/2013-0001.tif

In [13]:
print("Training Fibrosis 0:", len(os.listdir(base_loc+'/training/f0')))
print("Testing Fibrosis 0:", len(os.listdir(base_loc+'/testing/f0')))
print("-"*25)
print("Training Fibrosis 1:", len(os.listdir(base_loc+'/training/f1')))
print("Testing Fibrosis 1:", len(os.listdir(base_loc+'/testing/f1')))

Training Fibrosis 0: 917
Testing Fibrosis 0: 192
-------------------------
Training Fibrosis 1: 1568
Testing Fibrosis 1: 404


In [14]:
train_generator, test_generator = ImageGenerator(base_loc)

Found 2485 images belonging to 2 classes.
Found 596 images belonging to 2 classes.


## VGG Model

In [15]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras import layers
from tensorflow.keras.models import Model

pre_trained_model = VGG16(input_shape = (60, 80, 3),
                              include_top = False,
                              weights='imagenet')

for layer in pre_trained_model.layers:
    layer.trainable = False
    
pre_trained_model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 60, 80, 3)]       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 60, 80, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 60, 80, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 30, 40, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 30, 40, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 30, 40, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 15, 20, 128)       0     

In [20]:
last_layer = pre_trained_model.get_layer('block2_pool')  # block5_pool
print('last layer output shape: ', last_layer.output_shape)
last_output = last_layer.output


last layer output shape:  (None, 15, 20, 128)


In [21]:
# add DNN 
"""
x = tf.keras.layers.Dropout(0.5)(last_output)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv2D(64, kernel_size = (1,1), padding = 'same', activation = 'relu')(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)"""
x = tf.keras.layers.Flatten()(last_output) 
x = tf.keras.layers.Dropout(0.4)(x)
x = tf.keras.layers.Dense(1024, activation='relu')(x)
x = tf.keras.layers.Dense(1, activation='linear')(x)

model = Model(pre_trained_model.input, x)

In [22]:
model.compile(optimizer='adam',
             loss='sparse_categorical_crossentropy',
             metrics=['accuracy'])

model.summary()

Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 60, 80, 3)]       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 60, 80, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 60, 80, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 30, 40, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 30, 40, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 30, 40, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 15, 20, 128)      

In [23]:
history = model.fit_generator(train_generator,
                              epochs=50,
                              validation_data=test_generator)

Epoch 1/50


ResourceExhaustedError:  OOM when allocating tensor with shape[64,64,600,800] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node functional_3/block1_conv1/Relu (defined at <ipython-input-23-bb0f23c2cc7f>:3) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_train_function_1986]

Function call stack:
train_function


In [23]:
# delete the data

shutil.rmtree('/tmp/Liver_'+equipment)

In [None]:
import os, signal
train_generator, test_generator
os.kill(os.getpid(),signal.SIGKILL)