In [1]:
%%bash

mkdir -p ./ml_training_gcp

cat > ./ml_training_gcp/FaceMaskEfficientNet.py <<CODE

import tensorflow as tf
from tensorflow import keras
import numpy as np
#from PIL import Image
import os
from tensorflow.keras import layers


#Data is downloaded into the container via shellscript. We believe this is more efficient
traindirectory="/app/FaceMask/Train"
testdirectory="/app/FaceMask/Test"
validationdirectory="/app/FaceMask/Validation"
image_size=224
TrainData=keras.utils.image_dataset_from_directory(traindirectory, class_names=["WithoutMask","WithMask"], image_size=(image_size,image_size))
TestData=keras.utils.image_dataset_from_directory(testdirectory, class_names=["WithoutMask","WithMask"], image_size=(image_size,image_size))
ValidationData=keras.utils.image_dataset_from_directory(validationdirectory, class_names=["WithoutMask","WithMask"], image_size=(image_size,image_size))


img_augmentation = keras.models.Sequential(
    [
        layers.RandomRotation(factor=0.15),
        layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
        layers.RandomFlip(),
        layers.RandomContrast(factor=0.1),

    ],
    name="img_augmentation",
)

def build_model(num_classes, IMG_SIZE):
    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))

    x = img_augmentation(inputs) #image augmentation within the model. Should this be good practice? Or do we do it inside the map.
    #x=inputs
    model = keras.applications.EfficientNetB0(include_top=False, input_tensor=x, weights="imagenet")

    # Freeze the pretrained weights
    model.trainable = False

    # Rebuild top
    x = layers.GlobalAveragePooling2D(name="avg_pool")(model.output)
    x = layers.BatchNormalization()(x)

    top_dropout_rate = 0.2
    x = layers.Dropout(top_dropout_rate, name="top_dropout")(x)
    outputs = layers.Dense(num_classes, activation="softmax", dtype='float32', name="pred")(x)

    # Compile
    model = tf.keras.Model(inputs, outputs, name="EfficientNet")
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)
    model.compile(
        optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"]
    )
    return model

from datetime import datetime
strategy = tf.distribute.MirroredStrategy()
options = tf.data.Options()
options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA
#options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF


#unbatching as keras.utils.image_dataset_from_directory comes with a default batch
TrainData=TrainData.unbatch().with_options(options)
TestData=TestData.unbatch().with_options(options)
ValidationData=ValidationData.unbatch().with_options(options)

#TrainData=TrainData.map(lambda x, y: (img_augmentation(x), y),num_parallel_calls = tf.data.AUTOTUNE)
TrainData=TrainData.prefetch(tf.data.AUTOTUNE).batch(64*strategy.num_replicas_in_sync)
TestData=TestData.prefetch(tf.data.AUTOTUNE).batch(64*strategy.num_replicas_in_sync)
ValidationData=ValidationData.prefetch(tf.data.AUTOTUNE).batch(64*strategy.num_replicas_in_sync)

logs = "logs/" + datetime.now().strftime("%Y%m%d-%H%M%S")

early_stop= tf.keras.callbacks.EarlyStopping(
    monitor="val_accuracy",
    patience=2
)

tboard_callback = tf.keras.callbacks.TensorBoard(log_dir = logs,
                                                 histogram_freq = 1,
                                                 profile_batch = '500,520')
with strategy.scope():
    model=build_model(2, 224)
model.fit(TrainData,
        epochs=1,
          validation_data=TestData
         , callbacks=[tboard_callback,early_stop])
model.save("FaceMaskEfficientNetModel")

import tensorflow as tf
converter = tf.lite.TFLiteConverter.from_saved_model('/app/FaceMaskEfficientNetModel')
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_tflite_model = converter.convert()
with open('FaceMaskEfficientNetModel.tflite', 'wb') as f:
    f.write(quantized_tflite_model)
    
import numpy as np
from sklearn.metrics import f1_score
def evaluate_model(interpreter,model,dataset):
    #interpreter = tflite intepreter
    #model = full model
    dataset=dataset.with_options(options).unbatch().batch(512)
    input_index = interpreter.get_input_details()[0]["index"]
    output_index = interpreter.get_output_details()[0]["index"]

  # Run predictions on every image in the "test" dataset.
    prediction_digits = []
    label_digits =[]
    full_model_matches=[]
    full_model_prediction_digits = []
    full_model_label_digits =[]
    for i, batch in enumerate(dataset):
        #Only Validate for 1 batch
        if i==1:
            break
        print("processing batch: "+str(i+1))
        test_images,test_labels=batch
        
        #evaluate main model
        probs=model.predict(test_images)
        full_model_predictions=np.argmax(probs, axis=1)
        matches=list(np.array(full_model_predictions)==np.array(test_labels))
        full_model_matches.extend(matches)
        full_model_prediction_digits.extend(full_model_predictions)
        full_model_label_digits.extend(test_labels)
        
        #evaluate quantized model
        for n,test_image in enumerate(test_images):
            #print('Evaluated on {n} results so far.'.format(n=i))
    # Pre-processing: add batch dimension and convert to float32 to match with
    # the model's input data format.
            test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
            interpreter.set_tensor(input_index, test_image)

    # Run inference.
            interpreter.invoke()

    # Post-processing: remove batch dimension and find the digit with highest
    # probability.
            output = interpreter.tensor(output_index)
            digit = np.argmax(output()[0])
            prediction_digits.append(digit)
            label_digits.append(test_labels[n])

    print('\n')
  # Compare prediction results with ground truth labels to calculate accuracy.
    prediction_digits = np.array(prediction_digits)
    label_digits=np.array(label_digits)
    tflite_accuracy = (prediction_digits == label_digits).mean()
    tflite_f1= f1_score(prediction_digits,label_digits)
    full_model_accuracy=sum(full_model_matches)/len(full_model_matches)
    full_model_f1=f1_score(full_model_prediction_digits,full_model_label_digits)
    return tflite_accuracy, full_model_accuracy, tflite_f1,full_model_f1

interpreter = tf.lite.Interpreter(model_content=quantized_tflite_model)
interpreter.allocate_tensors()
tflite_accuracy, full_model_accuracy, tflite_f1, full_model_f1 = evaluate_model(interpreter,model,ValidationData)
print("The Full Model Accuracy is: "+str(full_model_accuracy)+" and the Quantized Model Accuracy is: "+str(tflite_accuracy))
print("The Full Model F1 is: "+str(full_model_f1)+" and the Quantized Model F1 is: "+str(tflite_f1))
model_performance={'FullModelAccuracy':full_model_accuracy, "QuantizedModelAccuracy":tflite_accuracy, 'AccuracyDifference':tflite_accuracy-full_model_accuracy
                  ,'FullModelF1':full_model_f1, "QuantizedModelF1":tflite_f1, 'F1Difference':tflite_f1-full_model_f1
                  }

import json
with open('EfficientNetPerformanceComparison.json', 'w') as f:
    json.dump(model_performance, f)
CODE

Cant Test Training Code Due to GPU Usage requiring complex setup that is easier to do with Docker /n
Pull Docker Image

In [2]:
#!docker pull tensorflow/tensorflow:latest-gpu
#gcr.io/deeplearning-platform-release/base-cu110
#RUN curl -sSL https://sdk.cloud.google.com | bash
!docker pull gcr.io/deeplearning-platform-release/tf-gpu.2-8

Using default tag: latest
latest: Pulling from deeplearning-platform-release/tf-gpu.2-8
Digest: sha256:5bcd6b34a8c00142040d1561b2a39d5ac13ba576bc8c22548d12d98d37ade168
Status: Image is up to date for gcr.io/deeplearning-platform-release/tf-gpu.2-8:latest
gcr.io/deeplearning-platform-release/tf-gpu.2-8:latest


Create Requirements Text

In [3]:
%%bash

cat > ./ml_training_gcp/requirements.txt <<EOF

EOF

Copy Authentication File into Container. This step can be skipped on GCP as it will be auto-auth (and yes this is not safe)

In [4]:
%%bash
cp daring-hash-348101-9717f041dd58.json ./ml_training_gcp

Create Shell Script to Download Data (Note: The entire folder structure will be copied into app. Therefore /app/FaceMask will exist)

In [5]:
%%bash

cat > ./ml_training_gcp/initialize.sh <<EOF
#! /bin/sh
gcloud auth activate-service-account --key-file=daring-hash-348101-9717f041dd58.json
gsutil -mq cp -r gs://seangoh-smu-mle-usa/FaceMask /app
python FaceMaskEfficientNet.py
gsutil -mq cp -r /app/FaceMaskEfficientNetModel gs://seangoh-smu-mle-usa/Models/
gsutil -q cp /app/FaceMaskEfficientNetModel.tflite gs://seangoh-smu-mle-usa/Models/
gsutil -q cp /app/EfficientNetPerformanceComparison.json gs://seangoh-smu-mle-usa/Models/
gsutil -mq cp -r /app/logs gs://seangoh-smu-mle-usa/logs/
rm -r /app/FaceMask
EOF

Create Docker File

In [6]:
%%bash

cat > ./ml_training_gcp/Dockerfile <<EOF
FROM gcr.io/deeplearning-platform-release/tf-gpu.2-8
WORKDIR /app

COPY . /app
RUN pip install -r requirements.txt

ENTRYPOINT ["sh", "initialize.sh"]
EOF

Build Docker

In [7]:
%%bash

docker build ./ml_training_gcp/ -t masketeers/containerizeml

Sending build context to Docker daemon  18.43kB
Step 1/5 : FROM gcr.io/deeplearning-platform-release/tf-gpu.2-8
 ---> cc037125fdd9
Step 2/5 : WORKDIR /app
 ---> Using cache
 ---> 884c3492a942
Step 3/5 : COPY . /app
 ---> Using cache
 ---> 60f19f9fd631
Step 4/5 : RUN pip install -r requirements.txt
 ---> Using cache
 ---> 449e72bd473d
Step 5/5 : ENTRYPOINT ["sh", "initialize.sh"]
 ---> Using cache
 ---> dfc94aeb429b
Successfully built dfc94aeb429b
Successfully tagged masketeers/containerizeml:latest


In [8]:
#Run Container ! docker run masketeers/containerizeml --gpus all -t nvidia/cuda

In [9]:
! docker run masketeers/containerizeml

Activated service account credentials for: [591661299323-compute@developer.gserviceaccount.com]
2022-06-22 05:43:26.581348: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/cuda/lib:/usr/local/lib/x86_64-linux-gnu:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
2022-06-22 05:43:26.581398: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-06-22 05:43:26.581430: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (f288b7df2c66): /proc/driver/nvidia/version does not exist
2022-06-22 05:43:26.581887: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use

In [10]:
#Run in Terminal
#sudo usermod -a -G docker ${USER}
#gcloud auth configure-docker asia-southeast1-docker.pkg.dev

In [11]:
!gcloud auth activate-service-account --key-file=daring-hash-348101-9717f041dd58.json

Activated service account credentials for: [591661299323-compute@developer.gserviceaccount.com]


In [12]:
!docker build ./ml_training_gcp/ -t us-east1-docker.pkg.dev/daring-hash-348101/smu-mle-usa/efficientnettrain:latest

Sending build context to Docker daemon  18.43kB
Step 1/5 : FROM gcr.io/deeplearning-platform-release/tf-gpu.2-8
 ---> cc037125fdd9
Step 2/5 : WORKDIR /app
 ---> Using cache
 ---> 884c3492a942
Step 3/5 : COPY . /app
 ---> Using cache
 ---> 60f19f9fd631
Step 4/5 : RUN pip install -r requirements.txt
 ---> Using cache
 ---> 449e72bd473d
Step 5/5 : ENTRYPOINT ["sh", "initialize.sh"]
 ---> Using cache
 ---> dfc94aeb429b
Successfully built dfc94aeb429b
Successfully tagged us-east1-docker.pkg.dev/daring-hash-348101/smu-mle-usa/efficientnettrain:latest


In [13]:
!docker push us-east1-docker.pkg.dev/daring-hash-348101/smu-mle-usa/efficientnettrain:latest

The push refers to repository [us-east1-docker.pkg.dev/daring-hash-348101/smu-mle-usa/efficientnettrain]

[1Beb1967b2: Preparing 
[1B02f7ac52: Preparing 
[1B95a9cd9c: Preparing 
[1Bb3120056: Preparing 
[1B53b734c4: Preparing 
[1Bb4a993bf: Preparing 
[1Ba0734f1b: Preparing 
[1B39fb5680: Preparing 
[1B6da164cd: Preparing 
[1B22d8d85c: Preparing 
[1Bb01c5179: Preparing 
[1Be696ff5b: Preparing 
[1B43fff4a9: Preparing 
[1B86de6044: Preparing 
[1B8d193daf: Preparing 
[1B188023c9: Preparing 
[1B0496c2b3: Preparing 
[1Bdc387b12: Preparing 
[1B257dc2e4: Preparing 
[1B54032850: Preparing 
[1B951137ff: Preparing 
[1Bc25e1d03: Preparing 
[1B01bb0f15: Preparing 
[1Bbf18a086: Preparing 
[1B6f75faab: Preparing 
[1Bac543081: Preparing 
[1Bc4c62eef: Preparing 
[1Ba71261c7: Preparing 
[1Bba43cdbe: Preparing 
[1B942867a5: Preparing 
[1Bfe6d10a9: Preparing 
[1B91182163: Preparing 
[1B6c5bb65c: Preparing 
[1B550a3bbe: Preparing 
[1Bedc62fb3: Layer already exists [29A[2K

In [14]:
#Does GCSFuse work better or copy with gsutils for image dataset
#Cannot test gpu training with python.py but requires a gpu container