In [None]:
import glob
import os
from pathlib import Path

import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

import tensorflow as tf
from tensorflow.keras import backend as K

NUM_GPU = 2
os.environ['CUDA_VISIBLE_DEVICES'] = '1, 2'

pd.set_option('display.max_colwidth', None)
print('TensorFlow version:', tf.__version__)

# get file path
img_path_temp = Path('your/path').rglob('*.png')
img_path_pd = pd.DataFrame(img_path_temp, columns = ['path']).astype('str')
img_path_pd[['front', 'channel']] = img_path_pd['path'].str.split(' wv ', expand=True)
# remove cells not captured by 5 channels
temp = img_path_pd[['front', 'channel']].groupby(['front']).count().reset_index()
# print(temp['channel'].value_counts())
temp = temp[temp['channel']!=5]
img_path_pd = img_path_pd.loc[~img_path_pd['front'].isin(temp['front'])]
# img_path_pd['channel'] = img_path_pd['channel'].str[:-6] # remove ").jpeg"
img_path_pd.loc[:, 'channel'] = img_path_pd.loc[:, 'channel'].str[:-5] # remove ").png"
# check whether all channels were captured
img_label_pd = img_path_pd.copy()
img_label_pd = img_label_pd[img_label_pd['channel']=='TL-Brightfield - dsRed']
img_label_pd = img_label_pd.drop_duplicates(subset=['front'])
img_label_pd.shape

img_path_pd['folder'] = img_path_pd['front'].str.split('/').str[-2]
img_path_pd['well'] = img_path_pd['front'].str.split('/').str[-1]
img_path_pd[['well', 'field']] = img_path_pd['well'].str.split('(', expand=True)
img_path_pd = img_path_pd.loc[~img_path_pd['well'].isin(['B - 3', 'G - 10'])]

img_path_pd.loc[img_path_pd['well'].isin(['B - 10', 'C - 5', 'C - 9', 'C - 10', 'D - 5',
                                          'E - 2', 'E - 7', 'F - 7', 'G - 7']), 'dose'] = 0.
img_path_pd.loc[img_path_pd['well'].isin(['C - 2', 'C - 11', 'D - 6', 'D - 8', 'D - 10',
                                          'E - 4', 'E - 8', 'F - 3', 'G - 9']), 'dose'] = 0.1
img_path_pd.loc[img_path_pd['well'].isin(['B - 5', 'D - 7', 'D - 9', 'D - 11', 'E - 6',
                                          'E - 11', 'F - 6', 'F - 10', 'F - 11']), 'dose'] = 0.3
img_path_pd.loc[img_path_pd['well'].isin(['B - 4', 'B - 7', 'C - 4', 'E - 5', 'E - 9',
                                          'F - 4', 'F - 8', 'G - 5', 'G - 6']), 'dose'] = 1.
img_path_pd.loc[img_path_pd['well'].isin(['B - 6', 'D - 2', 'D - 3', 'D - 4', 'E - 3',
                                          'F - 5', 'F - 9', 'G - 3', 'G - 4']), 'dose'] = 3.
img_path_pd.loc[img_path_pd['well'].isin(['B - 8', 'B - 9', 'C - 3', 'C - 6', 'C - 7',
                                          'C - 8', 'E - 10', 'F - 2', 'G - 8']), 'dose'] = 30.
img_path_pd.head()

In [None]:
ch_np = np.sort(img_path_pd['channel'].unique())
folder_np = np.sort(img_path_pd['folder'].unique())
dose_np = np.sort(img_path_pd['dose'].unique())

# subset of data
dose_class = [0, 0.1, 0.3, 1, 3, 30] # [0., 0.1, 0.3, 1., 3., 30.]
path_label_pd = img_path_pd.copy()
path_label_pd = path_label_pd[path_label_pd['dose'].isin(dose_class)]
le = LabelEncoder()
path_label_pd['dose'] = le.fit_transform(path_label_pd['dose'])
print('label [0, 1, 2, 3, 4, 5]:', le.classes_)
path_label_pd = path_label_pd[['front', 'dose']].drop_duplicates()
print('path_label_pd.shape =', path_label_pd.shape)
pl_train_pd, pl_val_pd = train_test_split(path_label_pd, test_size=0.1, random_state=1, stratify=path_label_pd['dose']) # path_label_train_pandas
print('Training size:', pl_train_pd.shape)
print('Validation size:', pl_val_pd.shape)

img_test = plt.imread('your/image/path')
print(img_test.shape)
print(np.amax(img_test)*65535)
print(np.amin(img_test)*65535)
img_norm = (img_test.astype(np.float)-img_test.min())*255.0/(img_test.max()-img_test.min())
print(np.amax(img_norm))
print(np.amin(img_norm))
plt.imshow(img_norm)

In [None]:
# Hyper parameters
BATCH_SIZE_PER_REPLICA = 16
GLOBAL_BATCH_SIZE = NUM_GPU * BATCH_SIZE_PER_REPLICA
channel = np.sort(img_path_pd['channel'].unique())
list_train_ds = tf.data.Dataset.from_tensor_slices((pl_train_pd['front'].to_numpy(), pl_train_pd['dose'].to_numpy()))
list_train_ds = list_train_ds.shuffle(buffer_size=pl_train_pd.shape[0])
list_val_ds = tf.data.Dataset.from_tensor_slices((pl_val_pd['front'].to_numpy(), pl_val_pd['dose'].to_numpy()))

def stack_img(front, label):
    img_list = []
    file_path = tf.strings.join([front, ' wv ', ch, ').png']) # for png
    img_temp = tf.io.decode_png(tf.io.read_file(file_path), dtype=tf.dtypes.uint16)
    img_list = [img_temp, img_temp, img_temp]
        
        
    img_stack = tf.concat(img_list, axis=2)
    return img_stack, label
train = list_train_ds.map(stack_img, num_parallel_calls = tf.data.AUTOTUNE)
val = list_val_ds.map(stack_img, num_parallel_calls = tf.data.AUTOTUNE)
for image, label in train.take(1):
    plt.figure()
    plt.imshow(image[:, :, 2])
    plt.title(label.numpy())
    print('Pixel max:', image.numpy()[:, :, 2].max())
    print('Pixel min:', image.numpy()[:, :, 2].min())
    print('Image shape:', image.numpy().shape)

def configure_for_performance(ds):
    ds = ds.shuffle(buffer_size=2000, reshuffle_each_iteration=True) # pl_train_pd.shape[0]
    ds = ds.batch(GLOBAL_BATCH_SIZE)
    ds = ds.prefetch(buffer_size=2)
    return ds

train_batches = configure_for_performance(train)
val_batches = configure_for_performance(val)

In [None]:
# Build model
IMG_HEIGHT = 1024
IMG_WIDTH = 1024
EPOCH_INITIAL = 30
BASE_LEARNING_RATE = 0.002

strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
    base_model = tf.keras.applications.MobileNetV2(input_shape=(512, 512, 3), # 1024
                                                   include_top=False,
                                                   weights='imagenet')
    base_model.trainable = False
    print("Number of layers in the base model: ", len(base_model.layers))
    
    inputs = tf.keras.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3))
    _ = tf.keras.layers.Rescaling(1./32767.5, offset=-1)(inputs)
    _ = tf.keras.layers.RandomFlip('horizontal_and_vertical')(_)
    _ = tf.keras.layers.RandomRotation(0.2)(_)
    _ = tf.keras.layers.Conv2D(3, (3, 3), strides=2, padding='same', activation='tanh')(_) # 1024
    # base_model
    _ = base_model(_)
    _ = tf.keras.layers.MaxPool2D(pool_size=2)(_) # 1024
    _ = tf.keras.layers.GlobalAveragePooling2D()(_)
    _ = tf.keras.layers.Dropout(0.2)(_)

    outputs = tf.keras.layers.Dense(len(dose_class))(_)
    model = tf.keras.Model(inputs, outputs)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=BASE_LEARNING_RATE),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    verbose=1,
    patience=2,
    mode='max',
    restore_best_weights=True)

csv_logger = tf.keras.callbacks.CSVLogger('train_v2.1_{:02d}_01_history_log_initial.csv'.format(NUM_EXP), append=False)

class save_per_epoch(tf.keras.callbacks.Callback):
    def __init__(self, verbose=0):
        super(save_per_epoch, self).__init__()
        self.verbose = verbose
    def on_epoch_end(self, epoch, logs={}):
        self.model.save('train_v2.1_{:02d}_temp_{}.h5'.format(NUM_EXP, epoch))

class validate_all_val(tf.keras.callbacks.Callback):
    def __init__(self, verbose=0):
        super(validate_all_val, self).__init__()
        self.verbose = verbose
    def on_epoch_end(self, epoch, logs={}):
        self.loss, self.acc = self.model.evaluate(val_batches, verbose=self.verbose)
        print(' - all_val_accuracy: {0:.4f}'.format(self.acc))

In [None]:
history = model.fit(train_batches,
                    epochs=EPOCH_INITIAL,
                    verbose=1,
                    validation_data=val_batches,
                    callbacks=[early_stopping, csv_logger, save_per_epoch()])

model.save('my_model_{}.h5'.format(NUM_EXP))
print('Model saved.')