# 충남대학교 세포 분류 및 성장 측정기
## 연구: 충남대학교 유기재료공학과 [생기능성 고분자 연구실](https://sites.google.com/view/cnu-polymer-bfpl/home) (양성윤 교수, sungyun@cnu.ac.kr)
### 연구 담당: 고은서 석사 (qhfma1029@naver.com)
## 코드 기술 지원: 충남대학교 컴퓨터공학과 [데이터 네트워크 연구실](https://dnlab.cs-cnu.org/) (이영석 교수, lee@cnu.ac.kr)
### 개발 담당: 문현수 박사과정 (munhyunsu@cnu.ac.kr)

---
### 1. 필수 사전 작업: LabelMe 활용 Object Labeling
#### [LabelMe Github](https://github.com/wkentaro/labelme)

---
### 2. 필요 라이브러리 설치
- **Google Colab 에서는 수행하지 않아도 됨**
- install Python3 required libraries
```bash
pip3 install --upgrade pip wheel
pip3 install --upgrade -r requirements.txt
```

---
### 3. 라이브러리 설치 확인 및 필요 함수 선언
- pix2pix.upsample 출처 [Github](https://github.com/tensorflow/examples/blob/master/tensorflow_examples/models/pix2pix/pix2pix.py)

In [None]:
import sys 
import os
import shutil

import tensorflow as tf
import tensorflow_hub as hub

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import scipy
from skimage import feature
from skimage import filters
from skimage.io import imread
from skimage.transform import resize
from sklearn.model_selection import train_test_split

In [None]:
print(f'Python3 version: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}')
print(f'Tensorflow version: {tf.__version__}')
print(f'Pandas version: {pd.__version__}')
print(f'Numpy version: {np.__version__}')

In [None]:
### CAUTION: This function was not coded by Hyunsu Mun, It is from tensorflow example for pix2pix model
def upsample(filters, size, norm_type='batchnorm', apply_dropout=False):
    """Upsamples an input.

    Conv2DTranspose => Batchnorm => Dropout => Relu

    Args:
    filters: number of filters
    size: filter size
    norm_type: Normalization type; either 'batchnorm' or 'instancenorm'.
    apply_dropout: If True, adds the dropout layer

    Returns:
    Upsample Sequential Model
    """

    initializer = tf.random_normal_initializer(0., 0.02)

    result = tf.keras.Sequential()
    result.add(tf.keras.layers.Conv2DTranspose(filters, size, strides=2,
                                               padding='same',
                                               kernel_initializer=initializer,
                                               use_bias=False))

    if norm_type.lower() == 'batchnorm':
        result.add(tf.keras.layers.BatchNormalization())
    elif norm_type.lower() == 'instancenorm':
        result.add(InstanceNormalization())

    if apply_dropout:
        result.add(tf.keras.layers.Dropout(0.5))

    result.add(tf.keras.layers.ReLU())

    return result

---
### 4. Google Colab 연결 또는 데이터셋 경로 설정
- `data_path` 에 LabelMe 결과에 대한 데이터셋 압축 파일 경로가 경로가 입력되어야 함
- `ls /tmp/dataset` 이 수행되었을 때 SegmentationClass, JPEGImages 디렉터리가 안 보이면 `path_root` 를 알맞게 설정해야 함!

In [None]:
try:
    from google.colab import drive
    print(f'Google Colab 환경입니다.')
    drive.mount('/content/gdrive')
    data_path = '/content/gdrive/My Drive/Colab Notebooks/CellDataset.tar.gz'
except ModuleNotFoundError:
    print(f'Google colab 환경이 아닙니다.')
    data_path = './Dataset/CellDataset.tar.gz'
data_path = os.path.abspath(os.path.expanduser(data_path))
print(f'{data_path=}')

In [None]:
!rm -r /tmp/dataset &> /dev/null
!mkdir /tmp/dataset
!tar --directory /tmp/dataset -xvf $data_path &> /dev/null
!ls /tmp/dataset

In [None]:
path_root = '/tmp/dataset/'
path_root = os.path.abspath(os.path.expanduser(path_root))
print(f'{path_root=}')

---
### 5. 이미지 분류 모델 학습

In [None]:
# 분류를 위한 파일 이동
jpg_root = os.path.join(path_root, 'JPEGImages')
dataset_root = os.path.join(path_root, 'Classification')

count = 0
for entry in os.scandir(jpg_root):
    if entry.name.startswith('.') or not entry.is_file():
        continue
    basename = os.path.basename(entry.path)
    classname = basename.split('_')[0]
    output_dir = os.path.join(dataset_root, classname)
    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, basename)
    shutil.move(entry.path, output_path)
    count = count + 1
print(f'{count} files moved')

In [None]:
# prepare dataset
dataset_root = os.path.abspath(os.path.expanduser(dataset_root))
print(f'Dataset root: {dataset_root}')

IMG_SHAPE = (224, 224)
image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1/255,
                                                                  validation_split=0.2)
train_data = image_generator.flow_from_directory(dataset_root, target_size=IMG_SHAPE,
                                                 follow_links=True,
                                                 subset='training')
validation_data = image_generator.flow_from_directory(dataset_root, target_size=IMG_SHAPE,
                                                      follow_links=True,
                                                      shuffle=False,
                                                      subset='validation')

for image_batch, label_batch in validation_data:
    print(f'Image batch shape: {image_batch.shape}')
    print(f'Label batch shape: {label_batch.shape}')
    break

In [None]:
# Prepare transfer learning
## Download headless (without the top classification layer) model
# feature_extractor_url = 'https://tfhub.dev/google/imagenet/mobilenet_v3_large_100_224/feature_vector/5'
feature_extractor_url = 'https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/2'
feature_extractor_layer = hub.KerasLayer(feature_extractor_url,
                                         input_shape=IMG_SHAPE+(3, ))
feature_batch = feature_extractor_layer(image_batch)

## Frozen feature extraction layer
feature_extractor_layer.trainable = False # for transfer learning classifier

## Make a model for classification
model = tf.keras.Sequential([
    feature_extractor_layer,
    tf.keras.layers.Dense(train_data.num_classes, activation='softmax')
])
model.summary()

In [None]:
# Train build
## Compile model for train
base_learning_rate = 0.001 # default
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=['accuracy'])

## Log class
### https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/Callback
class CollectBatchStats(tf.keras.callbacks.Callback):
    def __init__(self):
        self.batch_losses = []
        self.batch_val_losses = []
        self.batch_acc = []
        self.batch_val_acc = []
    
    def on_epoch_end(self, epoch, logs=None):
        self.batch_losses.append(logs['loss'])
        self.batch_acc.append(logs['accuracy'])
        self.batch_val_losses.append(logs['val_loss'])
        self.batch_val_acc.append(logs['val_accuracy'])
        self.model.reset_metrics()

In [None]:
steps_per_epoch = np.ceil(train_data.samples/train_data.batch_size) # train all dataset per epoch
initial_epoch = 10
batch_stats_callback = CollectBatchStats()

history = model.fit(train_data,
                    epochs=initial_epoch,
                    steps_per_epoch=steps_per_epoch,
                    validation_data=validation_data,
                    callbacks=[batch_stats_callback])

In [None]:
# Draw learning curves chart
acc = batch_stats_callback.batch_acc
val_acc = batch_stats_callback.batch_val_acc
loss = batch_stats_callback.batch_losses
val_loss = batch_stats_callback.batch_val_losses

fig = plt.figure(figsize=(8, 8))
ax1 = fig.add_subplot(2, 1, 1)
ax1.plot(acc, label='Training Accuracy')
ax1.plot(val_acc, label='Validation Accuracy')
ax1.legend(loc='lower right')
ax1.set_ylabel('Accuracy')
ax1.set_ylim([0, 1])
ax1.set_title('Training and Validation Accuracy')

ax2 = fig.add_subplot(2, 1, 2)
ax2.plot(loss, label='Training Loss')
ax2.plot(val_loss, label='Validation Loss')
ax2.legend(loc='upper right')
ax2.set_ylabel('Cross Entropy')
ax2.set_ylim([0, 1])
ax2.set_title('Training and Validation Loss')
ax2.set_xlabel('epoch')
fig.show()

In [None]:
## Unfrozen feature extraction layer
feature_extractor_layer.trainable = True

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate/10),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=['accuracy'])
model.summary()

In [None]:
finetune_epoch = 10

history_fine = model.fit(train_data,
                        epochs=initial_epoch+finetune_epoch,
                        initial_epoch=initial_epoch, # == history.epoch[-1]+1
                        steps_per_epoch=steps_per_epoch,
                        validation_data=validation_data,
                        callbacks = [batch_stats_callback])

In [None]:
# Draw learning curves chart
fine_acc = batch_stats_callback.batch_acc
fine_val_acc = batch_stats_callback.batch_val_acc
fine_loss = batch_stats_callback.batch_losses
fine_val_loss = batch_stats_callback.batch_val_losses

fig = plt.figure(figsize=(8, 8))
ax1 = fig.add_subplot(2, 1, 1)
ax1.plot(acc, label='Training Accuracy')
ax1.plot(val_acc, label='Validation Accuracy')
ax1.set_ylabel('Accuracy')
ax1.set_ylim([0, 1])
ax1.plot([initial_epoch,initial_epoch],
         ax1.get_ylim(), label='Start Fine Tuning')
ax1.legend(loc='lower right')
ax1.set_title('Training and Validation Accuracy')

ax2 = fig.add_subplot(2, 1, 2)
ax2.plot(loss, label='Training Loss')
ax2.plot(val_loss, label='Validation Loss')
ax2.set_ylabel('Cross Entropy')
ax2.set_ylim([0, 1])
ax2.plot([initial_epoch,initial_epoch],
         ax2.get_ylim(), label='Start Fine Tuning')
ax2.legend(loc='upper right')
ax2.set_title('Training and Validation Loss')
ax2.set_xlabel('epoch')
fig.show()

In [None]:
# Model export
export_path = '/tmp/model/classification'
model.save(export_path)

print(f'Export the model to {export_path}')

In [None]:
# Check the exported model too
reloaded = tf.keras.models.load_model(export_path)

result_batch = model.predict(image_batch)
reloaded_result_batch = reloaded.predict(image_batch)

print(f'Comparison between own model and exported model {abs(reloaded_result_batch - result_batch).max()}')

In [None]:
# Plot results
class_names = sorted(validation_data.class_indices.items(), key=lambda pair:pair[1])
class_names = np.array([key.title() for key, value in class_names])
print(f'Classes: {class_names}')

## get result labels
predicted_batch = model.predict(image_batch)
predicted_id = np.argmax(predicted_batch, axis=-1)
predicted_label_batch = class_names[predicted_id]

label_id = np.argmax(label_batch, axis=-1)

## plot
fig = plt.figure(figsize=(10,9))
fig.subplots_adjust(hspace=0.5)
for n in range(30):
    ax = fig.add_subplot(6, 5, n+1)
    ax.imshow(image_batch[n])
    color = 'green' if predicted_id[n] == label_id[n] else 'red'
    ax.set_title(predicted_label_batch[n].title(), color=color)
    ax.axis('off')
_ = fig.suptitle('Model predictions (green: correct, red: incorrect)')

In [None]:
# validation data classification result
validation_data.reset()

predicted_batch = model.predict(validation_data)
predicted_id = np.argmax(predicted_batch, axis=-1)

label_id = validation_data.classes

con_mat = tf.math.confusion_matrix(label_id, predicted_id)

result_df = pd.DataFrame(con_mat.numpy(), index=class_names, columns=class_names, dtype=int)

print('-- Validation result (Row: Actual Class, Column: Predicted Class) --')
print(result_df)

In [None]:
# 학습 완료 후 파일 위치 복원
queue = [dataset_root]
count = 0

while queue:
    ptr = queue.pop()
    for entry in os.scandir(ptr):
        if entry.is_dir() and not entry.name.startswith('.'):
            queue.append(entry.path)
        elif entry.name.endswith('.jpg'):
            shutil.move(entry.path, jpg_root)
            count = count + 1
shutil.rmtree(dataset_root)
print(f'{count} files moved')

---
### 6. 이미지 세그멘테이션 모델 학습

In [None]:
npy_root = os.path.join(path_root, 'SegmentationClass')

npy_items = list()
for entry in os.scandir(npy_root):
    if entry.name.startswith('.') or not entry.is_file():
        continue
    elif entry.name.endswith('.npy'):
        npy_items.append(entry.path)
npy_items.sort()
print(f'Loaded: {len(npy_items)} files')

In [None]:
jpg_root = os.path.join(path_root, 'JPEGImages')

jpg_items = list()
for entry in os.scandir(jpg_root):
    if entry.name.startswith('.') or not entry.is_file():
        continue
    elif entry.name.endswith('.jpg'):
        jpg_items.append(entry.path)
jpg_items.sort()
print(f'Loaded: {len(jpg_items)} files')

In [None]:
IMG_SHAPE = (224, 224)
def preprocess(path):
    data = np.load(path)
    data = resize(data, IMG_SHAPE, anti_aliasing=False)
    data = np.round(data/np.max(data))
    data[data > 0] = data[data > 0] + 1 # Class label + 1
    edge = filters.sobel(data) # Find edge
    data[edge > 0] = 1 # Set class 1 to edge
    return data

In [None]:
X = list()
y = list()
for train, label in zip(jpg_items, npy_items):
    t = imread(train)
    t = resize(t, IMG_SHAPE, anti_aliasing=True)
    X.append(t)
    l = np.expand_dims(preprocess(label), axis=-1)
    y.append(l)

In [None]:
test_size = 0.2
random_state = None
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))

In [None]:
BATCH_SIZE = 32
SHUFFLE_BUFFER_SIZE = 100
train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
test_dataset = test_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)

In [None]:
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE+(3, ), include_top=False)

# Use the activations of these layers
layer_names = [
    'block_1_expand_relu',   # 64x64
    'block_3_expand_relu',   # 32x32
    'block_6_expand_relu',   # 16x16
    'block_13_expand_relu',  # 8x8
    'block_16_project',      # 4x4
]
layers = [base_model.get_layer(name).output for name in layer_names]

# Create the feature extraction model
down_stack = tf.keras.Model(inputs=base_model.input, outputs=layers)

down_stack.trainable = False

In [None]:
up_stack = [
    upsample(512, 3),  # 4x4 -> 8x8
    upsample(256, 3),  # 8x8 -> 16x16
    upsample(128, 3),  # 16x16 -> 32x32
    upsample(64, 3),   # 32x32 -> 64x64
]

In [None]:
def display(display_list):
    plt.figure(figsize=(15, 15))

    title = ['Input Image', 'True Mask', 'Predicted Mask']

    for i in range(len(display_list)):
        plt.subplot(1, len(display_list), i+1)
        plt.title(title[i])
        plt.imshow(tf.keras.preprocessing.image.array_to_img(display_list[i]))
        plt.axis('off')
    plt.show()

In [None]:
for image, mask in train_dataset.take(1):
    sample_image, sample_mask = image[0], mask[0]
display([sample_image, sample_mask])

In [None]:
OUTPUT_CHANNELS = 3
def unet_model(output_channels):
    inputs = tf.keras.layers.Input(shape=IMG_SHAPE+(3, ))
    x = inputs

    # Downsampling through the model
    skips = down_stack(x)
    x = skips[-1]
    skips = reversed(skips[:-1])

    # Upsampling and establishing the skip connections
    for up, skip in zip(up_stack, skips):
        x = up(x)
        concat = tf.keras.layers.Concatenate()
        x = concat([x, skip])

    # This is the last layer of the model
    last = tf.keras.layers.Conv2DTranspose(
                   output_channels, 3, strides=2,
                   padding='same')  #64x64 -> 128x128

    x = last(x)

    return tf.keras.Model(inputs=inputs, outputs=x)

In [None]:
model = unet_model(OUTPUT_CHANNELS)
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
model.summary()

In [None]:
def create_mask(pred_mask):
    pred_mask = tf.argmax(pred_mask, axis=-1)
    pred_mask = pred_mask[..., tf.newaxis]
    return pred_mask[0]

In [None]:
def show_predictions(dataset=None, num=1):
    if dataset:
        for image, mask in dataset.take(num):
            pred_mask = model.predict(image)
            display([image[0], mask[0], create_mask(pred_mask)])
    else:
        display([X_test[0], y_test[0],
                 create_mask(model.predict(X_test[0][tf.newaxis, ...]))])

In [None]:
CALLBACKTERM = 10
class DisplayCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        if epoch % CALLBACKTERM == 0:
            show_predictions()
            print ('\nSample Prediction after epoch {}\n'.format(epoch+1))

In [None]:
EPOCHS = 20

model_history = model.fit(train_dataset, epochs=EPOCHS,
                          validation_data=test_dataset,
                          callbacks=[DisplayCallback()])

In [None]:
# Model export
export_path = '/tmp/model/argumentation'
model.save(export_path)

print(f'Export the model to {export_path}')

In [None]:
# Check the exported model too
reloaded = tf.keras.models.load_model(export_path)

r1 = model.predict(np.expand_dims(X_test[IDX], axis=0))
r2 = reloaded.predict(np.expand_dims(X_test[IDX], axis=0))

print(f'Comparison between own model and exported model {abs(r1 - r2).max()}')

In [None]:
IDX = 1
r = model.predict(np.expand_dims(X_test[IDX], axis=0))
display([X_test[IDX], y_test[IDX], create_mask(r)])

In [None]:
try:
    from google.colab import drive
    print(f'Google Colab 환경입니다.')
    drive.mount('/content/gdrive')
    model_path = '/content/gdrive/My Drive/Colab Notebooks/Model.tar.gz'
except ModuleNotFoundError:
    print(f'Google colab 환경이 아닙니다.')
    model_path = './Dataset/Model.tar.gz'
model_path = os.path.abspath(os.path.expanduser(model_path))
print(f'{model_path=}')

In [None]:
!tar -cvf $model_path /tmp/model &> /dev/null