### Starting Transfer Learning

In [65]:
!mkdir covid_trainer_retraining
!touch covid_trainer_retraining.__init__.py

mkdir: cannot create directory ‘covid_trainer_retraining’: File exists


In [66]:
%%writefile covid_trainer_retraining/__init__.py



Overwriting covid_trainer_retraining/__init__.py


In [67]:
%%writefile covid_trainer_retraining/train.py
import os
import pathlib
from PIL import Image

import IPython.display as display
import matplotlib.pylab as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import (
    Conv2D, Dense, Dropout, Flatten, MaxPooling2D, Softmax)
import tensorflow_hub as hub
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
import datetime

print("GPU Available: ", tf.config.list_physical_devices('GPU'))


Overwriting covid_trainer_retraining/train.py


In [68]:
%%writefile -a covid_trainer_retraining/train.py

IMG_HEIGHT = 600
IMG_WIDTH = 600
IMG_CHANNELS = 3

BATCH_SIZE = 8
# 10 is a magic number tuned for local training of this dataset.
SHUFFLE_BUFFER = 10 * BATCH_SIZE
AUTOTUNE = tf.data.experimental.AUTOTUNE

VALIDATION_IMAGES = 370
VALIDATION_STEPS = VALIDATION_IMAGES // BATCH_SIZE


Appending to covid_trainer_retraining/train.py


In [69]:
%%writefile -a covid_trainer_retraining/train.py

def decode_img(img, reshape_dims):
    # Convert the compressed string to a 3D uint8 tensor.
    img = tf.image.decode_jpeg(img, channels=IMG_CHANNELS)
    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
    img = tf.image.convert_image_dtype(img, tf.float32)
    # Resize the image to the desired size.
    return tf.image.resize(img, reshape_dims)


Appending to covid_trainer_retraining/train.py


In [70]:
%%writefile -a covid_trainer_retraining/train.py

CLASS_NAMES = ['Typical_Appearance', 'Negative_for_Pneumonia','Indeterminate_Appearance', 'Atypical_Appearance']

def decode_csv(csv_row):
    record_defaults = ["path", "target"]
    filename, label_string = tf.io.decode_csv(csv_row, record_defaults)
    image_bytes = tf.io.read_file(filename=filename)
    label = tf.math.equal(CLASS_NAMES, label_string)
    return image_bytes, label


Appending to covid_trainer_retraining/train.py


In [71]:
%%writefile -a covid_trainer_retraining/train.py

img_augmentation = Sequential(
    [
        preprocessing.RandomRotation(factor=0.15),
        preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1),
        preprocessing.RandomFlip(),
        preprocessing.RandomContrast(factor=0.1),
    ],
    name="img_augmentation",
)

MAX_DELTA = 63.0 / 255.0  # Change brightness by at most 17.7%
CONTRAST_LOWER = 0.2
CONTRAST_UPPER = 1.8


def read_and_preprocess(image_bytes, label, random_augment=False):
    if random_augment:
        img = decode_img(image_bytes, [IMG_HEIGHT + 10, IMG_WIDTH + 10])
        img = tf.image.random_crop(img, [IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS])
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_brightness(img, MAX_DELTA)
        img = tf.image.random_contrast(img, CONTRAST_LOWER, CONTRAST_UPPER)
        aug_img = img_augmentation(tf.expand_dims(img, axis=0))
    else:
        img = decode_img(image_bytes, [IMG_WIDTH, IMG_HEIGHT])
    return img, label


def read_and_preprocess_with_augment(image_bytes, label):
    return read_and_preprocess(image_bytes, label, random_augment=True)


Appending to covid_trainer_retraining/train.py


In [72]:
%%writefile -a covid_trainer_retraining/train.py

def load_dataset(csv_of_filenames, batch_size, training=True):
    dataset = tf.data.TextLineDataset(filenames=csv_of_filenames) \
        .map(decode_csv).cache()

    if training:
        dataset = dataset \
            .map(read_and_preprocess_with_augment) \
            .shuffle(SHUFFLE_BUFFER) \
            .repeat(count=1000) # Indefinately.
    else:
        dataset = dataset \
            .map(read_and_preprocess)

    # Prefetch prepares the next set of batches while current batch is in use.
    return dataset.batch(batch_size=batch_size).prefetch(buffer_size=AUTOTUNE)


Appending to covid_trainer_retraining/train.py


In [73]:
%%writefile -a covid_trainer_retraining/train.py

train_path = "gs://qwiklabs-gcp-03-365bf9c0599c-kaggle/train_data_image_classification_v2.txt"
eval_path = "gs://qwiklabs-gcp-03-365bf9c0599c-kaggle/val_data_image_classification_v2.txt"
nclasses = len(CLASS_NAMES)
hidden_layer_1_neurons = 400
hidden_layer_2_neurons = 100
dropout_rate = 0.25
num_filters_1 = 64
kernel_size_1 = 3
pooling_size_1 = 2
num_filters_2 = 32
kernel_size_2 = 3
pooling_size_2 = 2

# layers = [
#     Conv2D(num_filters_1, kernel_size=kernel_size_1,
#            activation='relu',
#            input_shape=(IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS)),
#     MaxPooling2D(pooling_size_1),
#     Conv2D(num_filters_2, kernel_size=kernel_size_2,
#            activation='relu'),
#     MaxPooling2D(pooling_size_2),
#     Flatten(),
#     Dense(hidden_layer_1_neurons, activation='relu'),
#     Dense(hidden_layer_2_neurons, activation='relu'),
#     Dropout(dropout_rate),
#     Dense(nclasses),
#     Softmax()
# ]

# old_model = Sequential(layers)
# old_model.compile(
#     optimizer='adam',
#     loss='categorical_crossentropy',
#     metrics=['accuracy'])

train_ds = load_dataset(train_path, BATCH_SIZE)
eval_ds = load_dataset(eval_path, BATCH_SIZE, training=False)

Appending to covid_trainer_retraining/train.py


In [74]:
%%writefile -a covid_trainer_retraining/train.py

strategy = tf.distribute.MirroredStrategy()

# module_selection = "mobilenet_v2_100_224"
module_handle = "https://tfhub.dev/tensorflow/efficientnet/b7/classification/1"

NOW = datetime.datetime.now().strftime('%Y%m%d%H%M%S')

checkpoint_path_1 = "gs://qwiklabs-gcp-03-365bf9c0599c-kaggle/covid_classifier_retraining/{}/model.ckpt".format(NOW)
checkpoint_dir_1 = os.path.dirname(checkpoint_path_1)

# Create a callback that saves the model's weights
checkpoint_callback_1 = tf.keras.callbacks.ModelCheckpoint(
   checkpoint_path_1, verbose=1, save_weights_only=True,
   # Save weights, save_best_only=every epoch.
   save_freq='epoch')

tensorboard_path = "gs://qwiklabs-gcp-03-365bf9c0599c-kaggle/covid_classifier_retraining/{}/tensorboard".format(NOW)
tensorboard_cb = tf.keras.callbacks.TensorBoard(tensorboard_path,
                                       histogram_freq=1)

def build_model(num_classes):
    inputs = layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3))
    x = img_augmentation(inputs)
    model = EfficientNetB0(include_top=False, input_tensor=x, weights="imagenet")

    # Freeze the pretrained weights
    model.trainable = False

    # Rebuild top
    x = layers.GlobalAveragePooling2D(name="avg_pool")(model.output)
    x = layers.BatchNormalization()(x)

    top_dropout_rate = 0.2
    x = layers.Dropout(top_dropout_rate, name="top_dropout")(x)
    outputs = layers.Dense(num_classes, activation="softmax", name="pred")(x)

    # Compile
    model = tf.keras.Model(inputs, outputs, name="EfficientNet")
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
    model.compile(
        optimizer=optimizer, loss="categorical_crossentropy", 
        metrics=['accuracy',tf.keras.metrics.AUC(),tf.keras.metrics.Precision(thresholds=[0.1,0.15,0.2,0.25,0.3])]
    )
    return model



earlystop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)


Appending to covid_trainer_retraining/train.py


In [75]:
%%writefile -a covid_trainer_retraining/train.py

with strategy.scope():
    model = build_model(num_classes=len(CLASS_NAMES))

def unfreeze_model(model):
    # We unfreeze the top 20 layers while leaving BatchNorm layers frozen
    for layer in model.layers[-20:]:
        if not isinstance(layer, layers.BatchNormalization):
            layer.trainable = True

    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
    model.compile(
        optimizer=optimizer, loss="categorical_crossentropy", 
        metrics=['accuracy']
    )


unfreeze_model(model)

Appending to covid_trainer_retraining/train.py


In [76]:
%%writefile -a covid_trainer_retraining/train.py

hist = model.fit(train_ds, epochs=1000, validation_data=eval_ds,steps_per_epoch=100, verbose=2,validation_steps=VALIDATION_STEPS,
                callbacks=[checkpoint_callback_1,tensorboard_cb])

model_path = "gs://qwiklabs-gcp-03-365bf9c0599c-kaggle/covid_classifier_retraining/{}/model".format(NOW)

tf.saved_model.save(transfer_model, model_path)

Appending to covid_trainer_retraining/train.py


In [77]:
%%writefile covid_trainer_retraining/config.yaml
trainingInput:
  scaleTier: CUSTOM
  # Configure a master worker with 4 T4 GPUs
  masterType: n1-highmem-16
  masterConfig:
    acceleratorConfig:
      count: 4
      type: NVIDIA_TESLA_K80

Overwriting covid_trainer_retraining/config.yaml


In [78]:
%%bash

# PROJECT_ID=$(gcloud config list project --format "value(core.project)")
# PROJECT_ID=qwiklabs-gcp-00-888aa3d75214
REGION="us-central1"
TFVERSION="2.3"
JOBID=covid_classifier_retrain$(date -u +%y%m%d_%H%M%S)

gcloud ai-platform jobs submit training $JOBID \
    --module-name=covid_trainer_retraining.train \
    --package-path=covid_trainer_retraining \
    --staging-bucket=gs://qwiklabs-gcp-03-365bf9c0599c-kaggle \
    --python-version=3.7 \
    --runtime-version=${TFVERSION} \
    --region=${REGION} \
    --config covid_trainer_retraining/config.yaml

jobId: covid_classifier_retrain210625_030930
state: QUEUED


Job [covid_classifier_retrain210625_030930] submitted successfully.
Your job is still active. You may view the status of your job with the command

  $ gcloud ai-platform jobs describe covid_classifier_retrain210625_030930

or continue streaming the logs with the command

  $ gcloud ai-platform jobs stream-logs covid_classifier_retrain210625_030930
