# unet.py

In [1]:
# U-Netを操作するクラス,関数群
from keras.models import Model
from keras.layers import Input, LeakyReLU, BatchNormalization, Activation, Dropout
from keras.layers.convolutional import Conv2D, ZeroPadding2D, Conv2DTranspose
from keras.layers.merge import concatenate
from keras.optimizers import Adam
import os

# imageは(256, 256, 1)で読み込み
IMAGE_SIZE = 256
# 一番初めのConvolutionフィルタ枚数は32
FIRST_LAYER_FILTER_COUNT = 64

# convolution後のshape_size=(size_old-filter_size)/stride+1


# U-Netのネットワークを構築するクラス
class UNet(object):
    def __init__(self, input_channel_count, output_channel_count, first_layer_filter_count):
        # 以下,first_layer_filter_count:Nと表記
        self.INPUT_IMAGE_SIZE = IMAGE_SIZE
        self.CONCATENATE_AXIS = -1
        # チャンネルの軸で結合することを指定している
        self.CONV_FILTER_SIZE = 4
        self.CONV_STRIDE = 2
        self.CONV_PADDING = (1, 1)
        self.DECONV_FILTER_SIZE = 2
        self.DECONV_STRIDE = 2

        # build NN with functional API
        input_img = Input((self.INPUT_IMAGE_SIZE, self.INPUT_IMAGE_SIZE, input_channel_count))
        # (256, 256, input_channel_count)

        enc1 = ZeroPadding2D(self.CONV_PADDING)(input_img)
        # (258, 258, input_channel_count)

        enc1 = Conv2D(first_layer_filter_count, self.CONV_FILTER_SIZE, strides=self.CONV_STRIDE)(enc1)
        # (128, 128, N)

        filter_count = first_layer_filter_count*2
        enc2 = self._add_encoding_layer(filter_count, enc1)
        # (64, 64, 2N)

        filter_count = first_layer_filter_count*4
        enc3 = self._add_encoding_layer(filter_count, enc2)
        # (32, 32, 4N)

        filter_count = first_layer_filter_count*8
        enc4 = self._add_encoding_layer(filter_count, enc3)
        # (16, 16, 8N)

        enc5 = self._add_encoding_layer(filter_count, enc4)
        # (8, 8, 8N)

        dec4 = self._add_decoding_layer(filter_count, True, enc5)
        # (16, 16, 8N)

        dec4 = concatenate([dec4, enc4], axis=self.CONCATENATE_AXIS)
        # (16, 16, 16N)

        filter_count = first_layer_filter_count*4
        dec5 = self._add_decoding_layer(filter_count, True, dec4)
        # (32, 32, 4N)

        dec5 = concatenate([dec5, enc3], axis=self.CONCATENATE_AXIS)
        # (32, 32, 8N)

        filter_count = first_layer_filter_count*2
        dec6 = self._add_decoding_layer(filter_count, True, dec5)
        # (64, 64, 2N)

        dec6 = concatenate([dec6, enc2], axis=self.CONCATENATE_AXIS)
        # (64, 64, 4N)

        filter_count = first_layer_filter_count
        dec7 = self._add_decoding_layer(filter_count, True, dec6)
        # (128, 128, N)

        dec7 = concatenate([dec7, enc1], axis=self.CONCATENATE_AXIS)
        # (128, 128 ,2N)

        dec8 = Activation(activation='relu')(dec7)
        dec8 = Conv2DTranspose(output_channel_count, self.DECONV_FILTER_SIZE, strides=self.DECONV_STRIDE)(dec8)
        # (256, 256, output_channel_count)

        dec8 = Activation(activation='sigmoid')(dec8)

        self.UNet = Model(inputs=input_img, outputs=dec8)

        # self.UNet.summary()

    def _add_encoding_layer(self, filter_count, sequence):
        new_sequence = LeakyReLU(0.2)(sequence)
        new_sequence = ZeroPadding2D(self.CONV_PADDING)(new_sequence)
        new_sequence = Conv2D(filter_count, self.CONV_FILTER_SIZE, strides=self.CONV_STRIDE)(new_sequence)
        new_sequence = BatchNormalization()(new_sequence)
        return new_sequence

    def _add_decoding_layer(self, filter_count, add_drop_layer, sequence):
        new_sequence = Activation(activation='relu')(sequence)
        new_sequence = Conv2DTranspose(filter_count, self.DECONV_FILTER_SIZE, strides=self.DECONV_STRIDE,
                                       kernel_initializer='he_uniform')(new_sequence)
        new_sequence = BatchNormalization()(new_sequence)
        if add_drop_layer:
            new_sequence = Dropout(0.5)(new_sequence)
        return new_sequence

    def get_model(self):
        return self.UNet


# U-Netをtrainingする関数
def train_unet():
    # 訓練用imageデータ読み込み
    x_train, file_names = load_x('Dataset' + os.sep + 'training' + os.sep + 'image')
    # 訓練用labelデータ読み込み
    y_train = load_y('Dataset' + os.sep + 'training' + os.sep + 'label')
    # 検証用imageデータ読み込み
    x_validation, file_names2 = load_x('Dataset' + os.sep + 'test' + os.sep + 'image')
    # 検証用labelデータ読み込み
    y_validation = load_y('Dataset' + os.sep + 'test' + os.sep + 'label')

    # 入力はグレースケール1チャンネル
    input_channel_count = 1
    # 出力はグレースケール1チャンネル
    output_channel_count = 1

    # U-Netの生成
    network = UNet(input_channel_count, output_channel_count, FIRST_LAYER_FILTER_COUNT)
    model = network.get_model()
    model.compile(loss=dice_coefficient_loss, optimizer=Adam(lr=1e-4), metrics=[dice_coefficient, 'accuracy'])

    BATCH_SIZE = 5
    # 20エポック回せば十分
    NUM_EPOCH = 1000
    history = model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=NUM_EPOCH, verbose=1,
                        validation_data=(x_validation, y_validation))
    model.save_weights('unet_weights.hdf5')

    return history


Using TensorFlow backend.


# plot.py

In [2]:
# 学習推移をプロットする関数群
import matplotlib.pyplot as plt


# historyを受け取りloss,accuracyの推移グラフを出力する関数
def plot_loss_accuracy(history):
    fig, (axL, axR) = plt.subplots(ncols=2, figsize=(10, 4))

    # Plot the loss in the history
    axL.plot(history.history['loss'], label="loss for training")
    axL.plot(history.history['val_loss'], label="loss for validation")
    x = history.history['val_loss']
    y = history.history['loss']

    axL.set_title('model loss')
    axL.set_xlabel('epoch')
    axL.set_ylabel('loss')
    axL.legend(loc='upper right')

    # Plot the accuracy in the history
    axR.plot(history.history['acc'], label="loss for training")
    axR.plot(history.history['val_acc'], label="loss for validation")
    axR.set_title('model accuracy')
    axR.set_xlabel('epoch')
    axR.set_ylabel('accuracy')
    axR.legend(loc='lower right')

    # グラフを保存
    plt.show()
    fig.savefig('./loss_accuracy.png')
    plt.close()


# metrics.py

In [3]:
# lossの計算関数群
import keras.backend as K


# ダイス係数を計算する関数
def dice_coefficient(y_true, y_pred):
    y_true = K.flatten(y_true)
    y_pred = K.flatten(y_pred)
    intersection = K.sum(y_true * y_pred)
    return 2.0 * intersection / (K.sum(y_true) + K.sum(y_pred) + 1)


# ロス関数
def dice_coefficient_loss(y_true, y_pred):
    return 1.0 - dice_coefficient(y_true, y_pred)


# load.py

In [4]:
# 画像をロードする関数群
import numpy as np


# 受け取ったパス下のファイル/ディレクトリのうち,'.DS_Store'以外のファイルのlistを返す関数
def load_file(folder_path):
    import os

    file_list = []
    for filename in os.listdir(folder_path):
        if os.path.isfile(os.path.join(folder_path, filename)) and not filename.startswith('.'):
            file_list.append(filename)
    return file_list


# 受け取ったパス下の静脈画像をグレースケールで読み込み,ファイル名とセットで返す関数
def load_x(folder_path):
    import os
    import cv2

    file_names = load_file(folder_path)
    file_names.sort()
    images = np.zeros((len(file_names), IMAGE_SIZE, IMAGE_SIZE, 1), np.float32)
    for i, image_file in enumerate(file_names):
        image = cv2.imread(folder_path + os.sep + image_file, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
        image = image[:, :, np.newaxis]
        images[i] = normalize_x(image)
    return images, file_names


# ラベル画像をグレースケールで読み込んで返す関数
def load_y(folder_path):
    import os
    import cv2

    image_files = load_file(folder_path)
    image_files.sort()
    images = np.zeros((len(image_files), IMAGE_SIZE, IMAGE_SIZE, 1), np.float32)
    for i, image_file in enumerate(image_files):
        image = cv2.imread(folder_path + os.sep + image_file, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
        image = image[:, :, np.newaxis]
        images[i] = normalize_y(image)
    return images


# normalize.py

In [5]:
# 画素値を正規化/非正規化する関数群


# 値を-1から1に正規化する関数
def normalize_x(image):
    image = image/127.5 - 1
    return image


# 値を0から1に正規化する関数
def normalize_y(image):
    image = image/255
    return image


# 値を0から255に戻す関数
def denormalize_y(image):
    image = image*255
    return image


# masking.py

In [6]:
# masking関連の関数
import cv2
import os
import numpy as np

# タイマーを定義したクラス
import time


class Timer:
    def __init__(self):
        self.start = time.time()

    def time_elapsed(self):
        return time.time() - self.start

    def reset(self):
        self.start = time.time()

        
# 林さんの従来手法でmaskingを行って返す関数
def previous_masking(image):
    tmp_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    timer1 = Timer()
    tmp_image = cv2.morphologyEx(tmp_image, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 13)),
                                 iterations=8)

    ret, mask = cv2.threshold(tmp_image, 0, 255,  cv2.THRESH_BINARY|cv2.THRESH_OTSU)

    masked = cv2.bitwise_and(image, image, mask=mask)

    return masked, timer1.time_elapsed()
        

# 動画でU-Netによるmaskingを行う関数
def movie_masking(name):
    user_movies = {'hayashi':'./movie/hayashi_n4.avi',
                   'kikuchi':'./movie/kikuchi_n2.avi',
                   'kurose':'./movie/kurose_n1.avi',
                   'okazawa':'./movie/okazawa8.avi',
                   'hayashi2':'./movie/hayashi/hayashi_%04d.png'}

    cap = cv2.VideoCapture(user_movies[name])

    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
    fps = 15
    width = 1024
    height = 768
    writer = cv2.VideoWriter(name+'.mov', fourcc, fps, (width, int(height/2)))

    input_channel_count = 1
    output_channel_count = 1
    network = UNet(input_channel_count, output_channel_count, FIRST_LAYER_FILTER_COUNT)
    model = network.get_model()
    model.load_weights('unet_weights.hdf5')
    BATCH_SIZE = 1
    threashold = 0.5
    previous_whole_time = 0.0
    unet_whole_time = 0.0
    frame_number = 0

    while cap.isOpened():
        ret, img = cap.read()
        if not ret:
            break

        frame_number += 1
        previous_masked, previous_lap_time = previous_masking(img)
        previous_whole_time += previous_lap_time


        timer1 = Timer()

        images = np.zeros((1, IMAGE_SIZE, IMAGE_SIZE, 1), np.float32)
        image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
        image = image[:, :, np.newaxis]
        images[0] = normalize_x(image)
        predicted = model.predict(images, BATCH_SIZE)
        predicted_mask = cv2.resize(predicted[0], (width, height))
        binarized_mask = predicted_mask.copy()
        binarized_mask[predicted_mask < threashold] = 0.0
        binarized_mask[threashold < predicted_mask] = 1.0
        # binarized_mask = cv2.morphologyEx(binarized_mask, cv2.MORPH_OPEN,kernel,iterations=10)
        binarized_mask = binarized_mask[:, :, np.newaxis]
        masked_img = img * binarized_mask
        masked_img = masked_img.astype(np.uint8)
        unet_lap_time = timer1.time_elapsed()
        unet_whole_time += unet_lap_time

        compare_img = cv2.hconcat([img, masked_img])
        compare_img = cv2.resize(compare_img, (width, int(height/2)))
        cv2.putText(compare_img, 'Original', (10, 350), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 200), thickness=2)
        cv2.putText(compare_img, 'U-Net prediction', (522, 350), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 200), thickness=2)
        #writer.write(compare_img)
        #cv2.imshow('Result', compare_img)

        compare_img2 = cv2.hconcat([previous_masked, masked_img])
        compare_img2 = cv2.resize(compare_img2, (width, int(height / 2)))
        cv2.putText(compare_img2, 'Previous method', (10, 350), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 200), thickness=2)
        cv2.putText(compare_img2, 'U-Net prediction', (522, 350), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 200), thickness=2)
        writer.write(compare_img2)
        cv2.imshow('Result', compare_img2)

        # ndarrayはdenormalize後uint8に変換しなければならない

        key = cv2.waitKey(1)

        if key == 27:
            break

    cap.release()
    writer.release()
    cv2.waitKey(1)
    cv2.destroyAllWindows()
    cv2.waitKey(1)

    print('Previous : ', end='')
    print(previous_whole_time/frame_number)
    print('U-Net : ', end='')
    print(unet_whole_time/frame_number)

    return True

movie_masking('okazawa')


Previous : 0.04752335765145042
U-Net : 0.05934313210574063


True

# main.py

In [7]:
import os


# 学習後のU-Netによる予測を行う関数
def predict():
    import cv2

    # test内の画像で予測
    X_test, file_names = load_x('Dataset' + os.sep + 'test' + os.sep + 'image')
    # X_test, file_names = load_X('testData' + os.sep + 'left_images')

    input_channel_count = 1
    output_channel_count = 1
    first_layer_filter_count = 64
    network = UNet(input_channel_count, output_channel_count, first_layer_filter_count)
    model = network.get_model()
    model.load_weights('unet_weights.hdf5')
    BATCH_SIZE = 12
    Y_pred = model.predict(X_test, BATCH_SIZE)

    for i, y in enumerate(Y_pred):
        # testDataフォルダ配下にleft_imagesフォルダを置いている
        img = cv2.imread('Dataset' + os.sep + 'test' + os.sep + 'image' + os.sep + file_names[i],0)
        # img = cv2.imread('testData' + os.sep + 'left_images' + os.sep + file_names[i])

        y = cv2.resize(y, (img.shape[1], img.shape[0]))
        y_dn = denormalize_y(y)

        cv2.imwrite('prediction' + os.sep + file_names[i], y_dn)
        # img_pre = cv2.imread('prediction' + str(i) + '.png')
        # img_gt = cv2.imread('testData' + os.sep + 'left_groundTruth' + os.sep + file_names[i])
        # img_compare = cv2.hconcat([img_pre, img_gt])
        # cv2.imwrite('compare' + str(i) + '.png', img_compare)


def masking(path):
    import cv2, numpy

    img1 = cv2.imread("000340.tif", 0)

    img2 = cv2.imread("mask.png", 0)

     # img2 = cv2.threshold(img2, 0, 255, cv2.THRESH_OTSU)

    masked = cv2.bitwise_and(img1, img1, mask=img2)

    cv2.imwrite(path, masked)


def create_mask(image):
    import cv2
    import numpy as np

    kernel = np.ones((9, 9), np.uint8)
    #tmp_image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7)), iterations=5)

    tmp_image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel, iterations=10)


    if len(tmp_image.shape) == 3:
        tmp_image = cv2.cvtColor(tmp_image, cv2.COLOR_RGB2GRAY)
        print(0)

    ret, tmp_image = cv2.threshold(tmp_image, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

    return tmp_image


def create_difference(image1, image2):
    import cv2
    from sklearn.metrics import mean_squared_error

    difference = cv2.absdiff(image1, image2)

    #cv2.imshow('', difference)
    if cv2.waitKey(0) == 'q':
        cv2.destroyAllWindows()

    mse = mean_squared_error(image1, image2)

    return difference, mse


# 動画でmaskingを行う関数
def movie_masking():
    import cv2, os

    X_test, file_names = load_x('user_movie' + os.sep + 'hayashi')

    input_channel_count = 1
    output_channel_count = 1
    first_layer_filter_count = 64
    network = UNet(input_channel_count, output_channel_count, first_layer_filter_count)
    model = network.get_model()
    model.load_weights('unet_weights.hdf5')
    BATCH_SIZE = 8
    Y_pred = model.predict(X_test, BATCH_SIZE)

    img = cv2.imread(file_names[0], 0)

    for i, y in enumerate(Y_pred):

        y = cv2.resize(y, (img.shape[1], img.shape[0]))
        y_dn = denormalize_y(y)

        cv2.imshow('frame', y_dn)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cv2.destroyAllWindows()

    return True


history = train_unet()
plot_loss_accuracy(history)
predict()


Train on 37 samples, validate on 10 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000

KeyboardInterrupt: 

# DCGAN

In [None]:
from __future__ import print_function, division

from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Dropout
from keras.layers import BatchNormalization, Activation, ZeroPadding2D
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D
from keras.models import Sequential, Model
from keras.optimizers import Adam

import matplotlib.pyplot as plt

import sys

import numpy as np

class DCGAN():
    def __init__(self):
        # Input shape
        self.img_rows = 28
        self.img_cols = 28
        self.channels = 1
        self.img_shape = (self.img_rows, self.img_cols, self.channels)
        self.latent_dim = 1000

        optimizer = Adam(0.0002, 0.5)

        # Build and compile the discriminator
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss='binary_crossentropy',
            optimizer=optimizer,
            metrics=['accuracy'])

        # Build the generator
        self.generator = self.build_generator()

        # The generator takes noise as input and generates imgs
        z = Input(shape=(self.latent_dim,))
        img = self.generator(z)

        # For the combined model we will only train the generator
        self.discriminator.trainable = False

        # The discriminator takes generated images as input and determines validity
        valid = self.discriminator(img)

        # The combined model  (stacked generator and discriminator)
        # Trains the generator to fool the discriminator
        self.combined = Model(z, valid)
        self.combined.compile(loss='binary_crossentropy', optimizer=optimizer)

    def build_generator(self):

        model = Sequential()

        model.add(Dense(128 * 7 * 7, activation="relu", input_dim=self.latent_dim))
        model.add(Reshape((7, 7, 128)))
        model.add(UpSampling2D())
        model.add(Conv2D(128, kernel_size=3, padding="same"))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Activation("relu"))
        model.add(UpSampling2D())
        model.add(Conv2D(64, kernel_size=3, padding="same"))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Activation("relu"))
        model.add(Conv2D(self.channels, kernel_size=3, padding="same"))
        model.add(Activation("tanh"))

        model.summary()

        noise = Input(shape=(self.latent_dim,))
        img = model(noise)

        return Model(noise, img)

    def build_discriminator(self):

        model = Sequential()

        model.add(Conv2D(32, kernel_size=3, strides=2, input_shape=self.img_shape, padding="same"))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dropout(0.25))
        model.add(Conv2D(64, kernel_size=3, strides=2, padding="same"))
        model.add(ZeroPadding2D(padding=((0,1),(0,1))))
        model.add(BatchNormalization(momentum=0.8))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dropout(0.25))
        model.add(Conv2D(128, kernel_size=3, strides=2, padding="same"))
        model.add(BatchNormalization(momentum=0.8))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dropout(0.25))
        model.add(Conv2D(256, kernel_size=3, strides=1, padding="same"))
        model.add(BatchNormalization(momentum=0.8))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dropout(0.25))
        model.add(Flatten())
        model.add(Dense(1, activation='sigmoid'))

        model.summary()

        img = Input(shape=self.img_shape)
        validity = model(img)

        return Model(img, validity)

    def train(self, epochs, batch_size=128, save_interval=50):

        # Load the dataset
        (X_train, _), (_, _) = mnist.load_data()

        # Rescale -1 to 1
        X_train = X_train / 127.5 - 1.
        X_train = np.expand_dims(X_train, axis=3)

        # Adversarial ground truths
        valid = np.ones((batch_size, 1))
        fake = np.zeros((batch_size, 1))

        for epoch in range(epochs):

            # ---------------------
            #  Train Discriminator
            # ---------------------

            # Select a random half of images
            idx = np.random.randint(0, X_train.shape[0], batch_size)
            imgs = X_train[idx]

            # Sample noise and generate a batch of new images
            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
            gen_imgs = self.generator.predict(noise)

            # Train the discriminator (real classified as ones and generated as zeros)
            d_loss_real = self.discriminator.train_on_batch(imgs, valid)
            d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # ---------------------
            #  Train Generator
            # ---------------------

            # Train the generator (wants discriminator to mistake images as real)
            g_loss = self.combined.train_on_batch(noise, valid)

            

            # If at save interval => save generated image samples
            if epoch % save_interval == 0:
                # Plot the progress
                print ("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))
                self.save_imgs(epoch)

    def save_imgs(self, epoch):
        r, c = 5, 5
        noise = np.random.normal(0, 1, (r * c, self.latent_dim))
        gen_imgs = self.generator.predict(noise)

        # Rescale images 0 - 1
        gen_imgs = 0.5 * gen_imgs + 0.5

        fig, axs = plt.subplots(r, c)
        cnt = 0
        for i in range(r):
            for j in range(c):
                axs[i,j].imshow(gen_imgs[cnt, :,:,0], cmap='gray')
                axs[i,j].axis('off')
                cnt += 1
        plt.show()
        # fig.savefig('./out.png')


#if __name__ == '__main__':
    #dcgan = DCGAN()
    #dcgan.train(epochs=5000, batch_size=32, save_interval=100)
