##### Run only if running this notebook for the first time

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf

In [2]:
image_info = pd.read_csv('image_classification.csv')

In [3]:
ls = []
for i,j in enumerate(image_info.path):
    try:
        load = tf.io.read_file(j)
    except Exception as e:
        ls.append(i)

In [4]:
image_available = image_info.drop(index=ls)

In [5]:
image_available.to_csv('image_classification_v2.txt',index=False,header=False)

In [6]:
X_train, X_non_train = train_test_split(image_available, test_size=0.2, random_state=42,stratify=image_available.target)
X_val, X_test = train_test_split(X_non_train, test_size=0.5, random_state=42,stratify=X_non_train.target)

In [7]:
print("Shape of train: ", X_train.shape)
print("Shape of train: ", X_val.shape)
print("Shape of train: ", X_test.shape)

Shape of train:  (4744, 2)
Shape of train:  (593, 2)
Shape of train:  (593, 2)


In [9]:
X_train.to_csv(r'train_data_image_classification.txt',index=False,header=False)
X_val.to_csv(r'val_data_image_classification.txt',index=False,header=False)
X_test.to_csv(r'test_data_image_classification.txt',index=False,header=False)

In [5]:
!gsutil cp train_data_image_classification.txt gs://qwiklabs-gcp-03-365bf9c0599c-kaggle
!gsutil cp val_data_image_classification.txt gs://qwiklabs-gcp-03-365bf9c0599c-kaggle
!gsutil cp test_data_image_classification.txt gs://qwiklabs-gcp-03-365bf9c0599c-kaggle

Copying file://train_data_image_classification.txt [Content-Type=text/plain]...
/ [1 files][394.8 KiB/394.8 KiB]                                                
Operation completed over 1 objects/394.8 KiB.                                    
Copying file://val_data_image_classification.txt [Content-Type=text/plain]...
/ [1 files][ 49.4 KiB/ 49.4 KiB]                                                
Operation completed over 1 objects/49.4 KiB.                                     
Copying file://test_data_image_classification.txt [Content-Type=text/plain]...
/ [1 files][ 49.4 KiB/ 49.4 KiB]                                                
Operation completed over 1 objects/49.4 KiB.                                     


### Starting Transfer Learning

In [333]:
!mkdir covid_trainer
!touch covid_trainer.__init__.py

mkdir: cannot create directory ‘covid_trainer’: File exists


In [366]:
%%writefile covid_trainer/train.py
import os
import pathlib
from PIL import Image

import IPython.display as display
import matplotlib.pylab as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import (
    Conv2D, Dense, Dropout, Flatten, MaxPooling2D, Softmax)
import tensorflow_hub as hub
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
import datetime

print("GPU Available: ", tf.config.list_physical_devices('GPU'))


Overwriting covid_trainer/train.py


In [367]:
%%writefile -a covid_trainer/train.py

IMG_HEIGHT = 600
IMG_WIDTH = 600
IMG_CHANNELS = 3

BATCH_SIZE = 8
# 10 is a magic number tuned for local training of this dataset.
SHUFFLE_BUFFER = 10 * BATCH_SIZE
AUTOTUNE = tf.data.experimental.AUTOTUNE

VALIDATION_IMAGES = 370
VALIDATION_STEPS = VALIDATION_IMAGES // BATCH_SIZE


Appending to covid_trainer/train.py


In [368]:
%%writefile -a covid_trainer/train.py

def decode_img(img, reshape_dims):
    # Convert the compressed string to a 3D uint8 tensor.
    img = tf.image.decode_jpeg(img, channels=IMG_CHANNELS)
    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
    img = tf.image.convert_image_dtype(img, tf.float32)
    # Resize the image to the desired size.
    return tf.image.resize(img, reshape_dims)


Appending to covid_trainer/train.py


In [369]:
# img = tf.io.read_file(
#     "gs://qwiklabs-gcp-03-365bf9c0599c-kaggle/train/000a312787f2.jpg")

# # Uncomment to see the image string.
# #print(img)
# img = decode_img(img, [IMG_WIDTH, IMG_HEIGHT])
# plt.imshow((img.numpy()));

In [370]:
%%writefile -a covid_trainer/train.py

CLASS_NAMES = ['Typical_Appearance', 'Negative_for_Pneumonia','Indeterminate_Appearance', 'Atypical_Appearance']

def decode_csv(csv_row):
    record_defaults = ["path", "target"]
    filename, label_string = tf.io.decode_csv(csv_row, record_defaults)
    image_bytes = tf.io.read_file(filename=filename)
    label = tf.math.equal(CLASS_NAMES, label_string)
    return image_bytes, label


Appending to covid_trainer/train.py


In [371]:
%%writefile -a covid_trainer/train.py

img_augmentation = Sequential(
    [
        preprocessing.RandomRotation(factor=0.15),
        preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1),
        preprocessing.RandomFlip(),
        preprocessing.RandomContrast(factor=0.1),
    ],
    name="img_augmentation",
)

MAX_DELTA = 63.0 / 255.0  # Change brightness by at most 17.7%
CONTRAST_LOWER = 0.2
CONTRAST_UPPER = 1.8


def read_and_preprocess(image_bytes, label, random_augment=False):
    if random_augment:
        img = decode_img(image_bytes, [IMG_HEIGHT + 10, IMG_WIDTH + 10])
        img = tf.image.random_crop(img, [IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS])
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_brightness(img, MAX_DELTA)
        img = tf.image.random_contrast(img, CONTRAST_LOWER, CONTRAST_UPPER)
        aug_img = img_augmentation(tf.expand_dims(img, axis=0))
    else:
        img = decode_img(image_bytes, [IMG_WIDTH, IMG_HEIGHT])
    return img, label


def read_and_preprocess_with_augment(image_bytes, label):
    return read_and_preprocess(image_bytes, label, random_augment=True)


Appending to covid_trainer/train.py


In [372]:
%%writefile -a covid_trainer/train.py

def load_dataset(csv_of_filenames, batch_size, training=True):
    dataset = tf.data.TextLineDataset(filenames=csv_of_filenames) \
        .map(decode_csv).cache()

    if training:
        dataset = dataset \
            .map(read_and_preprocess_with_augment) \
            .shuffle(SHUFFLE_BUFFER) \
            .repeat(count=None)  # Indefinately.
    else:
        dataset = dataset \
            .map(read_and_preprocess) \
            .repeat()  

    # Prefetch prepares the next set of batches while current batch is in use.
    return dataset.batch(batch_size=batch_size).prefetch(buffer_size=AUTOTUNE)


Appending to covid_trainer/train.py


In [373]:
# train_path = "/home/jupyter/train_data_image_classification.txt"
# train_data = load_dataset(train_path, 1)
# itr = iter(train_data)

In [374]:
# image_batch, label_batch = next(itr)
# img = image_batch[0]
# plt.imshow(img)
# print(label_batch[0])

In [375]:
%%writefile -a covid_trainer/train.py

train_path = "gs://qwiklabs-gcp-03-365bf9c0599c-kaggle/train_data_image_classification_v2.txt"
eval_path = "gs://qwiklabs-gcp-03-365bf9c0599c-kaggle/val_data_image_classification_v2.txt"
nclasses = len(CLASS_NAMES)
hidden_layer_1_neurons = 400
hidden_layer_2_neurons = 100
dropout_rate = 0.25
num_filters_1 = 64
kernel_size_1 = 3
pooling_size_1 = 2
num_filters_2 = 32
kernel_size_2 = 3
pooling_size_2 = 2

# layers = [
#     Conv2D(num_filters_1, kernel_size=kernel_size_1,
#            activation='relu',
#            input_shape=(IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS)),
#     MaxPooling2D(pooling_size_1),
#     Conv2D(num_filters_2, kernel_size=kernel_size_2,
#            activation='relu'),
#     MaxPooling2D(pooling_size_2),
#     Flatten(),
#     Dense(hidden_layer_1_neurons, activation='relu'),
#     Dense(hidden_layer_2_neurons, activation='relu'),
#     Dropout(dropout_rate),
#     Dense(nclasses),
#     Softmax()
# ]

# old_model = Sequential(layers)
# old_model.compile(
#     optimizer='adam',
#     loss='categorical_crossentropy',
#     metrics=['accuracy'])

train_ds = load_dataset(train_path, BATCH_SIZE)
eval_ds = load_dataset(eval_path, BATCH_SIZE, training=False)

Appending to covid_trainer/train.py


In [376]:
# old_model.fit_generator(
#     train_ds,
#     epochs=5,
#     steps_per_epoch=5,
#     validation_data=eval_ds,
#     validation_steps=VALIDATION_STEPS
# )

In [377]:
%%writefile -a covid_trainer/train.py

strategy = tf.distribute.MirroredStrategy()

# module_selection = "mobilenet_v2_100_224"
module_handle = "https://tfhub.dev/tensorflow/efficientnet/b7/classification/1"

NOW = datetime.datetime.now().strftime('%Y%m%d%H%M%S')

checkpoint_path_1 = "gs://qwiklabs-gcp-03-365bf9c0599c-kaggle/covid_classifier/{}/model.ckpt".format(NOW)
checkpoint_dir_1 = os.path.dirname(checkpoint_path_1)

# Create a callback that saves the model's weights
checkpoint_callback_1 = tf.keras.callbacks.ModelCheckpoint(
   checkpoint_path_1, verbose=1, save_weights_only=True,
   # Save weights, save_best_only=every epoch.
   save_freq='epoch')

tensorboard_path = "gs://qwiklabs-gcp-03-365bf9c0599c-kaggle/covid_classifier/{}/tensorboard".format(NOW)
tensorboard_cb = tf.keras.callbacks.TensorBoard(tensorboard_path,
                                       histogram_freq=1)

with strategy.scope():
    transfer_model = tf.keras.Sequential([
        hub.KerasLayer(module_handle, trainable=True),
        tf.keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(
            nclasses,
            activation='softmax',
            kernel_regularizer=tf.keras.regularizers.l2(0.0001))
    ])
    transfer_model.build((None,)+(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
    transfer_model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy',tf.keras.metrics.AUC(),tf.keras.metrics.Precision(thresholds=[0.1,0.15,0.2,0.25,0.3]),tf.keras.metrics.Recall(
    thresholds=[0.1,0.15,0.2,0.25,0.3])])
    
transfer_model.summary()

earlystop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50)


Appending to covid_trainer/train.py


In [378]:
%%writefile -a covid_trainer/train.py

transfer_model.fit(
    train_ds,
    epochs=1000,
    steps_per_epoch=100,
    validation_data=eval_ds,
    validation_steps=VALIDATION_STEPS,
    callbacks=[checkpoint_callback_1, tensorboard_cb,earlystop]
)

Appending to covid_trainer/train.py


In [379]:
%%writefile -a covid_trainer/train.py

model_path = "gs://qwiklabs-gcp-03-365bf9c0599c-kaggle/covid_classifier/{}/model".format(NOW)

tf.saved_model.save(transfer_model, model_path)

Appending to covid_trainer/train.py


In [380]:
%%writefile covid_trainer/config.yaml
trainingInput:
  scaleTier: CUSTOM
  # Configure a master worker with 4 T4 GPUs
  masterType: n1-highmem-16
  masterConfig:
    acceleratorConfig:
      count: 8
      type: NVIDIA_TESLA_K80

Overwriting covid_trainer/config.yaml


In [381]:
%%bash

# PROJECT_ID=$(gcloud config list project --format "value(core.project)")
# PROJECT_ID=qwiklabs-gcp-00-888aa3d75214
REGION="us-central1"
TFVERSION="2.3"
JOBID=covid_classifier_trainable_$(date -u +%y%m%d_%H%M%S)

gcloud ai-platform jobs submit training $JOBID \
    --module-name=covid_trainer.train \
    --package-path=covid_trainer \
    --staging-bucket=gs://qwiklabs-gcp-03-365bf9c0599c-kaggle \
    --python-version=3.7 \
    --runtime-version=${TFVERSION} \
    --region=${REGION} \
    --config covid_trainer/config.yaml

jobId: covid_classifier_trainable_210625_015417
state: QUEUED


Job [covid_classifier_trainable_210625_015417] submitted successfully.
Your job is still active. You may view the status of your job with the command

  $ gcloud ai-platform jobs describe covid_classifier_trainable_210625_015417

or continue streaming the logs with the command

  $ gcloud ai-platform jobs stream-logs covid_classifier_trainable_210625_015417
