In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from matplotlib import image
from matplotlib import pyplot
import os
import cv2
import random
import concurrent.futures
import time
import sklearn
print(tf.__version__)

In [None]:
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
tf.config.experimental_connect_to_cluster(resolver)
# This is the TPU initialization code that has to be at the beginning.
tf.tpu.experimental.initialize_tpu_system(resolver)
print("All devices: ", tf.config.list_logical_devices('TPU'))

In [None]:
train = pd.read_csv("../input/hpa-single-cell-image-classification/train.csv", dtype=str)
train['image_name'] = [i+"_green.png" for i in train['ID'].values]
train.head()

In [None]:
label_list=["Nucleoplasm", "Nuclear membrane", "Nucleoli", "Nucleoli fibrillar center", "Nuclear speckles",\
            "Nuclear bodies", "Endoplasmic reticulum", "Golgi apparatus", "Intermediate filaments", \
            "Actin filaments", "Microtubules", "Mitotic spindle", "Centrosome", "Plasma membrane", \
            "Mitochondria", "Aggresome", "Cytosol", "Vesicles and punctate cytosolic patterns", "Negative"
           ]
img_names = train["image_name"]
def cvt_multi_labels(labellist, splitter='|'):
    labels = []
    for label in labellist:
        l=np.zeros(len(label_list))
        for ele in label:
            if(ele != splitter):
                l[int(ele)]=1
        labels.append(list(l))
    return labels

labels = cvt_multi_labels(train['Label'])
def myfunc():
    return 0.5
c = list(zip(img_names, labels))
random.shuffle(c, myfunc)
img_names, labels = zip(*c)

In [None]:
IMSIZE = 256
def read_img(image):
    img = tf.keras.preprocessing.image.load_img(image, color_mode='rgb', target_size=(IMSIZE, IMSIZE))
    return img
def prepare_dataset(namelist, labels, path):
    start = time.time()
    labels = np.array(labels)
    labels = tf.convert_to_tensor(labels)
    labels = tf.cast(labels, tf.int8)
    namelist = [os.path.join(path, ele) for ele in namelist]
    imgs = []
    with concurrent.futures.ThreadPoolExecutor(max_workers = 16) as executor:
        i = 0
        for value in executor.map(read_img, namelist):
            i+=1
            print("\rFetching: [{}/{}]".format(i, len(namelist)), end="", flush=True)
            imgs.append(value)
        imgs = np.stack(imgs)
        imgs = tf.convert_to_tensor(imgs)
    print("\nExecution time: ",time.time() - start, "s")
    return imgs, labels
 

In [None]:
with tf.device('/cpu:0'):
    path = '../input/hpa-single-cell-image-classification/train'
    TRAIN_SIZE = 10000
    VAL_SIZE = 2000
    train_images, train_labels = prepare_dataset(img_names[:TRAIN_SIZE], labels[:TRAIN_SIZE], path)
    val_images, val_labels = prepare_dataset(img_names[TRAIN_SIZE:VAL_SIZE+TRAIN_SIZE], \
                                             labels[TRAIN_SIZE:VAL_SIZE+TRAIN_SIZE], path)
print("Training Image tensor shape", train_images.shape)
print("Training Labels tensor shape", train_labels.shape)
print("Testing Image tensor shape", val_images.shape)
print("Tesing Labels tensor shape", val_labels.shape)


In [None]:
SEED = 100
random_rotation = tf.keras.layers.experimental.preprocessing.RandomRotation(3.142/2, seed=SEED)
random_flip = tf.keras.layers.experimental.preprocessing.RandomFlip(mode="horizontal_and_vertical", seed=SEED)
random_zoom = tf.keras.layers.experimental.preprocessing.RandomZoom((0, 0.25), seed=SEED)
random_translate = tf.keras.layers.experimental.preprocessing.RandomTranslation((-0, 0.25), (-0, 0.25), seed=SEED)

with tf.device('/cpu:0'):
    BATCH_SIZE = 128
    train_dataset = tf.data.Dataset.from_tensor_slices((tf.cast(train_images, tf.uint8), tf.cast(train_labels, tf.uint8))).batch(BATCH_SIZE)
    train_dataset = train_dataset.prefetch(buffer_size = tf.data.AUTOTUNE).shuffle(TRAIN_SIZE)
    del train_images, train_labels
    val_dataset = tf.data.Dataset.from_tensor_slices((tf.cast(val_images, tf.uint8), tf.cast(val_labels, tf.uint8))).batch(16)
    val_dataset = train_dataset.prefetch(buffer_size = tf.data.AUTOTUNE).shuffle(VAL_SIZE)
    del val_images, val_labels
def normalize_train(imgs, label):
    imgs = random_rotation.call(imgs)
    imgs = random_flip.call(imgs)
    imgs = random_zoom.call(imgs)
    imgs = random_translate.call(imgs)
    return tf.cast(imgs, tf.float16)/255, label
def normalize_val(imgs, label):
    return tf.cast(imgs, tf.float16)/255, label
train_dataset = train_dataset.map(normalize_train, num_parallel_calls=4)
val_dataset = val_dataset.map(normalize_val, num_parallel_calls=4)

In [None]:
strategy = tf.distribute.experimental.TPUStrategy(resolver)
with strategy.scope():
    base_model = tf.keras.applications.MobileNetV2(include_top=False, input_shape=(256,256,3),\
                                                   weights='imagenet', pooling = 'max')
    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.Dense(19, activation='sigmoid')
    ])
    optimizer = tf.keras.optimizers.Adam(0.0001)
loss_object = tf.keras.losses.BinaryCrossentropy(from_logits=True, reduction=tf.keras.losses.Reduction.NONE)
train_loss_history = []
val_loss_history = []
dist_train_dataset = strategy.experimental_distribute_dataset(train_dataset)
dist_val_dataset = strategy.experimental_distribute_dataset(val_dataset)
epoch_auc = tf.keras.metrics.AUC(num_thresholds=200, multi_label=True)

In [None]:
model.summary()

In [None]:
from sklearn.metrics import accuracy_score
def compute_loss(labels, predictions):
    per_example_loss = loss_object(labels, predictions)
    return tf.nn.compute_average_loss(per_example_loss, global_batch_size=BATCH_SIZE)
def compute_acc(labels, predictions):
    return accuracy_score(labels, predictions)
def train_step(inputs):
    images, labels = inputs
    with tf.GradientTape() as tape:
        logits = model(images, training=True)
        loss_value = compute_loss(labels, logits)
        epoch_auc.update_state(labels, logits)
        auc = epoch_auc.result()
    train_loss_history.append(loss_value)
    grads = tape.gradient(loss_value, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss_value, auc
@tf.function
def distributed_train_step(dist_inputs):
    per_replica_losses, per_replica_auc = strategy.run(train_step, args=(dist_inputs,))
    loss = strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses,
                         axis=None)
    auc = strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_auc,
                         axis=None)
    return loss, auc
def val_step(inputs):
    images, labels = inputs
    with tf.GradientTape() as tape:
        logits = model(images, training=False)
        loss_value = compute_loss(labels, logits)
        epoch_auc.update_state(labels, logits)
        auc = epoch_auc.result()
    val_loss_history.append(loss_value)
    return loss_value, auc
@tf.function
def distributed_val_step(dist_inputs):
    per_replica_losses, per_replica_auc = strategy.run(val_step, args=(dist_inputs,))
    loss = strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses,
                         axis=None)
    auc = strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_auc,
                         axis=None)
    return loss, auc
def train(epochs):
    for epoch in range(epochs):
        start = time.time()
        i = 0
        print ('\nEpoch {}/{} '.format(epoch+1, epochs))
        for data in dist_train_dataset:
            loss, auc = distributed_train_step(data)
            
            percent = float(i+1) * 100 / len(train_dataset)
            arrow   = '-' * int(percent/100 * 10 - 1) + '>'
            spaces  = ' ' * (10 - len(arrow))
            print('\rTraining: [%s%s] %d %% - Training Loss: %f - Training AUC: %f'% (arrow, spaces, percent, loss, auc), end='', flush=True)
            i += 1
        i = 0
        print(" -", int(time.time()-start), "s", end="")
        print()
        start = time.time()
        for data in dist_val_dataset:
            loss, auc = distributed_val_step(data)
            percent = float(i+1) * 100 / len(val_dataset)
            arrow   = '-' * int(percent/100 * 10 - 1) + '>'
            spaces  = ' ' * (10 - len(arrow))
            print('\rValidate: [%s%s] %d %% - Validation Loss: %f - Validation AUC: %f'% (arrow, spaces, percent, loss, auc), end='', flush=True)
            i += 1
        print(" -", int(time.time()-start), "s")

In [None]:
train(15)

In [None]:
model.save('./mobilenetv2.h5')