In [1]:
%%bash

mkdir -p ./ml_training_gcp

cat > ./ml_training_gcp/FaceMaskEfficientNet.py <<CODE

import tensorflow as tf
from tensorflow import keras
import numpy as np
#from PIL import Image
import os
from keras import layers

print(os.getcwd())
print(os.listdir())

#Data is downloaded into the container via shellscript. We believe this is more efficient
traindirectory="/app/FaceMask/Train"
testdirectory="/app/FaceMask/Test"
image_size=224
TrainData=keras.utils.image_dataset_from_directory(traindirectory, class_names=["WithoutMask","WithMask"], image_size=(image_size,image_size))
TestData=keras.utils.image_dataset_from_directory(testdirectory, class_names=["WithoutMask","WithMask"], image_size=(image_size,image_size))

img_augmentation = keras.models.Sequential(
    [
        layers.RandomRotation(factor=0.15),
        layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
        layers.RandomFlip(),
        layers.RandomContrast(factor=0.1),
    ],
    name="img_augmentation",
)

def build_model(num_classes, IMG_SIZE):
    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))

    x = img_augmentation(inputs) #image augmentation within the model. Should this be good practice? Or do we do it inside the map.
    #x=inputs
    model = keras.applications.EfficientNetB0(include_top=False, input_tensor=x, weights="imagenet")

    # Freeze the pretrained weights
    model.trainable = False

    # Rebuild top
    x = layers.GlobalAveragePooling2D(name="avg_pool")(model.output)
    x = layers.BatchNormalization()(x)

    top_dropout_rate = 0.2
    x = layers.Dropout(top_dropout_rate, name="top_dropout")(x)
    outputs = layers.Dense(num_classes, activation="softmax", dtype='float32', name="pred")(x)

    # Compile
    model = tf.keras.Model(inputs, outputs, name="EfficientNet")
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)
    model.compile(
        optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"]
    )
    return model

def normalize_img(image, label):
    return tf.cast(image, tf.float32) / 255., label

from datetime import datetime
strategy = tf.distribute.MirroredStrategy()

TrainData.map(normalize_img).prefetch(tf.data.AUTOTUNE).batch(64*strategy.num_replicas_in_sync)
TestData.map(normalize_img).prefetch(tf.data.AUTOTUNE).batch(64*strategy.num_replicas_in_sync)
logs = "logs/" + datetime.now().strftime("%Y%m%d-%H%M%S")

tboard_callback = tf.keras.callbacks.TensorBoard(log_dir = logs,
                                                 histogram_freq = 1,
                                                 profile_batch = '500,520')


with strategy.scope():
    model=build_model(2, 224)
model.fit(TrainData,
        epochs=1,
          validation_data=TestData
         , callbacks=[tboard_callback])
model.save("FaceMaskEfficientNetModel")
CODE

Cant Test Training Code Due to GPU Usage requiring complex setup that is easier to do with Docker /n
Pull Docker Image

In [2]:
#!docker pull tensorflow/tensorflow:latest-gpu
#gcr.io/deeplearning-platform-release/base-cu110
#RUN curl -sSL https://sdk.cloud.google.com | bash
!docker pull gcr.io/deeplearning-platform-release/tf-gpu.2-8

Using default tag: latest
latest: Pulling from deeplearning-platform-release/tf-gpu.2-8
Digest: sha256:5bcd6b34a8c00142040d1561b2a39d5ac13ba576bc8c22548d12d98d37ade168
Status: Image is up to date for gcr.io/deeplearning-platform-release/tf-gpu.2-8:latest
gcr.io/deeplearning-platform-release/tf-gpu.2-8:latest


Create Requirements Text

In [3]:
%%bash

cat > ./ml_training_gcp/requirements.txt <<EOF

EOF

Copy Authentication File into Container. This step can be skipped on GCP as it will be auto-auth (and yes this is not safe)

In [4]:
%%bash
cp daring-hash-348101-2f4dd5ea462e.json ./ml_training_gcp

Create Shell Script to Download Data (Note: The entire folder structure will be copied into app. Therefore /app/FaceMask will exist)

In [5]:
%%bash

cat > ./ml_training_gcp/initialize.sh <<EOF
#! /bin/sh
gcloud auth activate-service-account --key-file=daring-hash-348101-2f4dd5ea462e.json
gsutil -mq cp -r gs://seangoh-smu-mle/FaceMask/ /app
python FaceMaskEfficientNet.py
gsutil -mq cp -r /app/FaceMaskEfficientNetModel gs://seangoh-smu-mle/Models/
gsutil -mq cp -r /app/logs gs://seangoh-smu-mle/logs/
rm -r /app/FaceMask
EOF

Create Docker File

In [6]:
%%bash

cat > ./ml_training_gcp/Dockerfile <<EOF
FROM gcr.io/deeplearning-platform-release/tf-gpu.2-8
WORKDIR /app

COPY . /app
RUN pip install -r requirements.txt

ENTRYPOINT ["sh", "initialize.sh"]
EOF

Build Docker

In [7]:
%%bash

docker build ./ml_training_gcp/ -t masketeers/containerizeml

#2 [internal] load .dockerignore
#2 sha256:64abac9484ee7f7e4aa27be6d3a1c371a74bae6759de9bdab3b441d4f154513c
#2 transferring context:
#2 transferring context: 2B 0.1s done
#2 DONE 0.1s

#1 [internal] load build definition from Dockerfile
#1 sha256:bce379eb4b48e060c577086fb98f1a05a12e86f819bae7bf57b18c9b4424387c
#1 transferring dockerfile: 196B 0.1s done
#1 DONE 0.2s

#3 [internal] load metadata for gcr.io/deeplearning-platform-release/tf-gpu.2-8:latest
#3 sha256:38885463e846526fc0e8d585f14202d74ae98ebd5f0945b54469e2c8de63547b
#3 DONE 0.0s

#4 [1/4] FROM gcr.io/deeplearning-platform-release/tf-gpu.2-8
#4 sha256:323de4acf75d6b2e459b4b8a0e310d7dc4db57cbce631e77d93d9d372268c69f
#4 DONE 0.0s

#6 [internal] load build context
#6 sha256:4a7f8d69d1c3676beb0d5447e5d2c7d50afeef47887a9d06d04447939881d4a0
#6 transferring context: 5.84kB 0.1s done
#6 DONE 0.1s

#5 [2/4] WORKDIR /app
#5 sha256:26e4bdffa15fd2e5c558f57fb17c21d919624ea361762da17724581c60dce5fa
#5 CACHED

#7 [3/4] COPY . /app
#7 sha256:c

In [8]:
#Run Container

In [13]:
! docker run masketeers/containerizeml --gpus all -t nvidia/cuda

/app

Activated service account credentials for: [sean-goh-serviceaccount@daring-hash-348101.iam.gserviceaccount.com]
2022-06-06 13:50:11.266888: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/cuda/lib:/usr/local/lib/x86_64-linux-gnu:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
2022-06-06 13:50:11.267204: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-06-06 13:50:11.267294: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (d4d69927ea18): /proc/driver/nvidia/version does not exist
2022-06-06 13:50:11.269506: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library


['FaceMask', 'daring-hash-348101-2f4dd5ea462e.json', 'download.sh', '.ipynb_checkpoints', 'initialize.sh', 'Dockerfile', 'requirements.txt', 'FaceMaskEfficientNet.py']
Found 10000 files belonging to 2 classes.
Found 992 files belonging to 2 classes.
Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5

   16384/16705208 [..............................] - ETA: 
  417792/16705208 [..............................] - ETA: 
  958464/16705208 [>.............................] - ETA: 
 1556480/16705208 [=>............................] - ETA: 
 2473984/16705208 [===>..........................] - ETA: 
 3276800/16705208 [====>.........................] - ETA: 


  1/313 [..............................] - ETA: 46:11 - loss: 0.9270 - accuracy: 0.531
  2/313 [..............................] - ETA: 5:54 - loss: 0.7297 - accuracy: 0.625
  3/313 [..............................] - ETA: 5:50 - loss: 0.4946 - accuracy: 0.75
  4/313 [.............................

In [10]:
#executing shell script in python
#import subprocess
#>>> subprocess.call(['sh', './test.sh'])

CONTAINER ID   IMAGE          COMMAND                  CREATED          STATUS          PORTS      NAMES
62e9618d6ff4   149db932f30b   "sh initialize.sh --â€¦"   15 minutes ago   Up 15 minutes   8080/tcp   angry_lewin


62e9618d6ff4
