In [None]:
import pandas as pd
import numpy as np

import tensorflow as tf
import time
import glob
import os
import PIL
import matplotlib.pyplot as plt
import tensorflow_hub as hub
from kaggle_datasets import KaggleDatasets

In [None]:
# detect and init the TPU
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)

In [None]:
# instantiate a distribution strategy
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)
GCS_DS_PATH = KaggleDatasets().get_gcs_path()

In [None]:
train_path='../input/hpa-single-cell-image-classification'
train_data=pd.read_csv(train_path+'/train.csv')

train_data.head()

In [None]:
train_data['path']=GCS_DS_PATH+'/train/'+train_data['ID']

In [None]:
train_data['Label'] = train_data['Label'].str.replace('|', ' ')
train_image=glob.glob(train_path + '/train/*.png')

def format_column(train_data):
    for i in range(0,19):
        train_data[i]=0
    return train_data 

train_data=format_column(train_data)

val=[i.strip('').split() for y in  train_data.Label for i in [y] ]
for ind,num in enumerate(val):
    for each in num:
       
        train_data[int(each)][ind]=1
        

In [None]:
train_label=train_data.iloc[:,3:]
train_label

In [None]:
path=train_data['ID'][train_data[0]==1].reset_index( drop=True)[1]
red=PIL.Image.open((train_path +'/train/'+str(path)+'_red.png'))
green=PIL.Image.open((train_path +'/train/'+str(path)+'_green.png'))
blue=PIL.Image.open((train_path +'/train/'+str(path)+'_blue.png'))
im = np.stack((
                red,
                green,
                blue),-1)
plt.imshow(im)
plt.show()
plt.clf() #will make the plot window empty

time.sleep(5)

In [None]:
path=train_data['ID'][train_data[18]==1].reset_index( drop=True)[1]
red=PIL.Image.open((train_path +'/train/'+str(path)+'_red.png'))
green=PIL.Image.open((train_path +'/train/'+str(path)+'_green.png'))
blue=PIL.Image.open((train_path +'/train/'+str(path)+'_blue.png'))
im = np.stack((
                red,
                green,
                blue),-1)
plt.imshow(im)
plt.show()
plt.clf() #will make the plot window empty

time.sleep(5)

In [None]:
path=train_data['ID'][(train_data[0]==1) & (train_data[15]==1)].reset_index( drop=True)[1]
red=PIL.Image.open((train_path +'/train/'+str(path)+'_red.png'))
green=PIL.Image.open((train_path +'/train/'+str(path)+'_green.png'))
blue=PIL.Image.open((train_path +'/train/'+str(path)+'_blue.png'))
im = np.stack((
                red,
                green,
                blue),-1)
plt.imshow(im)
plt.show()
plt.clf() #will make the plot window empty

time.sleep(5)

In [None]:
path=train_data['ID'][(train_data[10]==1) & (train_data[3]==1)].reset_index( drop=True)[1]
red=PIL.Image.open((train_path +'/train/'+str(path)+'_red.png'))
green=PIL.Image.open((train_path +'/train/'+str(path)+'_green.png'))
blue=PIL.Image.open((train_path +'/train/'+str(path)+'_blue.png'))
im = np.stack((
                red,
                green,
                blue),-1)
plt.imshow(im)
plt.show()
plt.clf() #will make the plot window empty

time.sleep(5)

In [None]:
train_inputs=train_data['path']

In [None]:
train_label

In [None]:
from sklearn.model_selection import train_test_split
train_path,val_path, train_target, val_target = train_test_split(train_inputs.values,train_label.values, test_size=0.2, random_state=42)

In [None]:
train_path

In [None]:
print(f'Shape of train: {train_path.shape}')
print(f'Shape of val: {train_target.shape}')

In [None]:
def load_data(train_path, target):
    red = tf.squeeze(tf.image.decode_png(tf.io.read_file(train_path+'_red.png'), channels=1), [2])
    blue = tf.squeeze(tf.image.decode_png(tf.io.read_file(train_path+'_blue.png'), channels=1), [2])
    green = tf.squeeze(tf.image.decode_png(tf.io.read_file(train_path+'_green.png'), channels=1), [2])
    img = tf.stack((
                red,
                green,
                blue), axis=2)
    return img, target


AUTO = tf.data.experimental.AUTOTUNE
train=tf.data.Dataset.from_tensor_slices((train_path, train_target)).map(load_data, num_parallel_calls=AUTO)
val=tf.data.Dataset.from_tensor_slices((val_path, val_target)).map(load_data, num_parallel_calls=AUTO)

In [None]:
IMG_SIZE = 512 # All images will be resized to 512*512
#Resize the images to a fixed input size, and rescale the input channels to a range of [-1,1]
def aug_format_example(image, label=None):
    

    image = tf.cast(image, tf.float32)
    image = (image/255) 
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    image = tf.image.random_contrast(image, lower=0.3, upper=1.2)
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    image = tf.image.random_brightness(image, max_delta=0.5)

   
   
    if label is None :
        return image
    else:
        return image, label

In [None]:
BATCH_SIZE = 16 * tpu_strategy.num_replicas_in_sync
train=train.map(aug_format_example,num_parallel_calls=AUTO).batch(BATCH_SIZE).prefetch(buffer_size=AUTO)
val=val.map(aug_format_example,num_parallel_calls=AUTO).batch(BATCH_SIZE).prefetch(buffer_size=AUTO)

In [None]:
#Inspect a batch of data:
for image_batch, label_batch in train.take(1):
    pass

image_batch.shape

In [None]:
# Define the checkpoint directory to store the checkpoints

checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

def build_lrfn(lr_start=0.00001, lr_max=0.00005, 
               lr_min=0.00001, lr_rampup_epochs=5, 
               lr_sustain_epochs=0, lr_exp_decay=.8):
    lr_max = lr_max * tpu_strategy.num_replicas_in_sync

    def lrfn(epoch):
        if epoch < lr_rampup_epochs:
            lr = (lr_max - lr_start) / lr_rampup_epochs * epoch + lr_start
        elif epoch < lr_rampup_epochs + lr_sustain_epochs:
            lr = lr_max
        else:
            lr = (lr_max - lr_min) *\
                 lr_exp_decay**(epoch - lr_rampup_epochs\
                                - lr_sustain_epochs) + lr_min
        return lr
    return lrfn

lrfn = build_lrfn()
STEPS_PER_EPOCH = train_label.shape[0] // BATCH_SIZE

In [None]:
# Callback for printing the LR at the end of each epoch.
class PrintLR(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs=None):
    print('\nLearning rate for epoch {} is {}'.format(epoch + 1,
                                                      model.optimizer.lr.numpy()))

In [None]:
callbacks = [
    
    tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix,
                                       save_weights_only=True),
    tf.keras.callbacks.LearningRateScheduler(lrfn,verbose=1),
    PrintLR()
]

In [None]:
from tensorflow.keras.applications import DenseNet121
import tensorflow.keras.layers as L

with tpu_strategy.scope():
    model = tf.keras.Sequential([DenseNet121(input_shape=(512, 512, 3),
                                             weights='imagenet',
                                             include_top=False),
                                 L.GlobalAveragePooling2D(),
                                 L.Dense(train_target.shape[1],
                                         activation='softmax')])
        
    model.compile(optimizer='adam',
                  loss = 'categorical_crossentropy',
                  metrics=['categorical_accuracy'])
    model.summary()

In [None]:
initial_epochs = 20
validation_steps=20

loss0,accuracy0 = model.evaluate(val, steps = validation_steps)

print("initial loss: {:.2f}".format(loss0))
print("initial accuracy: {:.2f}".format(accuracy0))

In [None]:


history = model.fit(train,
                    epochs=initial_epochs,
                    callbacks=callbacks,
                    
                    validation_data=val)