# MNIST image classification with Keras only

In [None]:
import os
import shutil
from datetime import datetime
import sys
import json
import argparse

import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
print(tf.__version__)

In [None]:
PROJECT = "ml-practice-260405" # REPLACE WITH YOUR PROJECT ID
BUCKET = "bucket-ml-practice-260405" # REPLACE WITH YOUR BUCKET NAME
REGION = "us-central1" # REPLACE WITH YOUR BUCKET REGION e.g. us-central1
MODEL_TYPE = "dnn"  # "linear", "dnn", "dnn_dropout", or "cnn"
SAC = 'jupyter-notebook-sac-f'
SAC_KEY_DESTINATION = '/media/mujahid7292/Data/Gcloud_Tem_SAC'

In [None]:
# Do not change 
os.environ["ACCOUNT"] = "sandcorp2014@gmail.com"
os.environ["PROJECT"] = PROJECT
os.environ["BUCKET"] = BUCKET
os.environ["REGION"] = REGION
os.environ['SAC'] = SAC
os.environ['SAC_KEY_DESTINATION'] = SAC_KEY_DESTINATION
os.environ["MODEL_TYPE"] = MODEL_TYPE
os.environ["TFVERSION"] = "2.1.0"  # Tensorflow version
os.environ["IMAGE_URI"] = os.path.join("gcr.io", PROJECT, "mnistmodel")

In [None]:
%%bash
gcloud config set account $ACCOUNT

In [None]:
%%bash
gcloud config set project $PROJECT
gcloud config set compute/region $REGION

# Activate the service account with above key

In [None]:
%%bash
gcloud auth activate-service-account \
--key-file=${SAC_KEY_DESTINATION}/${SAC}.json

# Set Google Application Credentials

In [None]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]='{}/{}.json'.format(SAC_KEY_DESTINATION,SAC)

Check Whether Google Application Credential Was Set Successfully Outside Virtual Environment

In [None]:
%%bash
set | grep GOOGLE_APPLICATION_CREDENTIALS 

# Set Default Project And Region

In [None]:
%%bash
gcloud config set account $ACCOUNT
gcloud config set project $PROJECT
gcloud config set compute/region $REGION

# Give GCS Access Permision To This Service Account

In [None]:
%%bash
gcloud projects add-iam-policy-binding $PROJECT \
    --member serviceAccount:$SAC@$PROJECT.iam.gserviceaccount.com \
    --role roles/storage.admin

In [None]:
%%bash
mkdir mnistmodel_keras_only
mkdir mnistmodel_keras_only/trainer/

In [None]:
%%writefile mnistmodel_keras_only/trainer/__init__.py
# Empty file

In [None]:
%%writefile mnistmodel_keras_only/trainer/task.py
import argparse
import json
import os
import sys

from . import model

def _parse_arguments(argv):
    """
    Parse command line arguments
    """
    parser = argparse.ArgumentParser()
    
    parser.add_argument(
        '--model_type',
        help="Which model type to use.",
        type=str,
        default='dnn'
    )
    
    parser.add_argument(
        '--epochs',
        help='The number of epochs to train.',
        type=int,
        default=10
    )
    
    parser.add_argument(
        '--steps_per_epoch',
        help='The number of steps per epoch to train.',
        type=int,
        default=100
    )
    
    parser.add_argument(
        '--job-dir',
        help="Directory where to save the model.",
        type=str,
        default='mnistmodel_keras_only/'
    )
    
    return parser.parse_known_args(argv)

def main():
    """
    Parse command line arguments and kicks off model training.
    """
    args = _parse_arguments(sys.argv[1:])[0]
    
    model_layers = model.get_layers(args.model_type)
    
    image_model = model.build_model(model_layers, args.job_dir)
    
    model_history = model.train_and_evaluate(
        image_model, args.epochs, args.steps_per_epoch, args.job_dir
    )
    
if __name__ == '__main__':
    main()

Next, let's group non-model functions into a util file to keep the model file simple. We'll copy over the `scale` and `load_dataset` functions from the previous lab.

In [None]:
%%writefile mnistmodel_keras_only/trainer/util.py
import tensorflow as tf

def scale(image, label):
    """
    Scale image from 0 to 255 int range to a 0.0 to 1.0 float range
    """
    image = tf.cast(x=image, dtype=tf.float32)
    image /= 255
    image = tf.expand_dims(input=image, axis=-1)
    return image, label

def load_dataset(data, training=True, buffer_size=5000, batch_size=100, nclasses=10):
    """
    Loads MNIST dataset into a tf.data.Dataset
    """
    (x_train, y_train), (x_test, y_test) = data
    
    x = x_train if training else x_test
    y = y_train if training else y_test
    
    # One-hot encode the class
    y = tf.keras.utils.to_categorical(y = y, num_classes=nclasses)
    
    # Convert our data into tf.data
    dataset = tf.data.Dataset.from_tensor_slices((x,y))
    dataset = dataset.map(scale).batch(batch_size)
    
    # During training shuffle our dataset
    if training:
        dataset = dataset.shuffle(buffer_size).repeat()
        
    return dataset

Finally, let's code the models! The [tf.keras API](https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras) accepts an array of [layers](https://www.tensorflow.org/api_docs/python/tf/keras/layers) into a [model object](https://www.tensorflow.org/api_docs/python/tf/keras/Model), so we can create a dictionary of layers based on the different model types we want to use. The below file has two functions: `get_layers` and `create_and_train_model`. We will build the structure of our model in `get_layers`. Last but not least, we'll copy over the training code from the previous lab into `train_and_evaluate`.

These models progressively build on each other. Look at the imported `tensorflow.keras.layers` modules and the default values for the variables defined in `get_layers` for guidance.

In [None]:
%%writefile mnistmodel_keras_only/trainer/model.py
import os
import shutil

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.layers import (Conv2D, Dense, Dropout, Flatten,
                                     MaxPooling2D, Softmax)

from . import util

# Image Variables
WIDTH = 28
HEIGHT = 28

def get_layers(model_type, nclasses = 10, hidden_layer_1_neurons=400,
              hidden_layer_2_neurons=100):
    """
    Construct layers for keras model based on a dict of model types.
    """
    model_layers = {
        'linear':[
            Flatten(),
            Dense(nclasses),
            Softmax()
        ],
        'dnn':[
            Flatten(),
            Dense(hidden_layer_1_neurons, activation='relu'),
            Dense(hidden_layer_2_neurons, activation='relu'),
            Dense(nclasses),
            Softmax()
        ]
    }
    
    return model_layers[model_type]

def build_model(layers, output_dir):
    """
    Compiles keras model for image classification.
    """
    model = Sequential(layers)
    model.compile(optimizer='adam',
                 loss='categorical_crossentropy',
                 metrics=['accuracy'])
    return model

def train_and_evaluate(model, num_epochs, steps_per_epoch, output_dir):
    """
    Compiles keras model and loads data into it for training.
    """
    # Load MNIST dataset
    mnist = tf.keras.datasets.mnist.load_data()
    
    # Spilt dataset into train and validation.
    train_data = util.load_dataset(mnist)
    validation_data = util.load_dataset(mnist, training=False)
    
    # Create TensorBoard callback
    callbacks = []
    if output_dir:
        tensorboard_callback = TensorBoard(log_dir=output_dir)
        callbacks = [tensorboard_callback]
    
    # Train the model
    history = model.fit(
        train_data,
        validation_data=validation_data,
        epochs=num_epochs,
        steps_per_epoch=steps_per_epoch,
        verbose=2,
        callbacks=callbacks
    )
    
    # Now save the trained model
    if output_dir:
        export_path = os.path.join(output_dir, 'keras_export')
        model.save(export_path, save_format='tf')
    
    return history

## Run as a Python module

Since we want to run our code on Cloud ML Engine, we've packaged it as a python module.

The `model.py` and `task.py` containing the model code is in <a href="mnistmodel_keras_only/trainer">mnistmodel_keras_only/trainer</a>

**Let's first run it locally for a few steps to test the code.** 

In [None]:
%%bash
MODEL_TYPE='dnn'
JOB_DIR='mnistmodel_keras_only'
rm -rf mnistmodel_keras_only.tar.gz mnist_keras_only_trained
python3 -m mnistmodel_keras_only.trainer.task \
    --job-dir=${JOB_DIR}\
    --epochs=5 \
    --steps_per_epoch=50 \
    --model_type=${MODEL_TYPE}

In [None]:
%%bash
MODEL_TYPE='dnn'
JOB_DIR='mnistmodel_keras_only'
rm -rf mnistmodel_keras_only.tar.gz mnist_keras_only_trained
JOB_DIR=./tmp
gcloud ml-engine local train \
    --module-name=trainer.task \
    --package-path=./mnistmodel_keras_only/trainer \
    -- \
    --job-dir=${JOB_DIR}\
    --epochs=10 \
    --steps_per_epoch=50 \
    --model_type=${MODEL_TYPE}

**Now, let's do it on Cloud ML Engine so we can train on GPU (`--scale-tier=BASIC_GPU`)**

Note the GPU speed up depends on the model type. You'll notice the more complex CNN model trains significantly faster on GPU, however the speed up on the simpler models is not as pronounced.

In [None]:
%%bash
OUTDIR=gs://${BUCKET}/mnist/trained_dnn
JOBNAME=mnist_dnn_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gsutil -m rm -rf $OUTDIR
gcloud ml-engine jobs submit training $JOBNAME \
    --region=$REGION \
    --module-name=trainer.task \
    --package-path=./mnistmodel_keras_only/trainer \
    --job-dir=$OUTDIR \
    --staging-bucket=gs://$BUCKET \
    --scale-tier=BASIC_GPU \
    --runtime-version=$TFVERSION \
    -- \
    --epochs=50 \
    --steps_per_epoch=50 \
    --model_type=$MODEL_TYPE

## Local Training

Now that we know that our models are working as expected, let's run it on the [Google Cloud AI Platform](https://cloud.google.com/ml-engine/docs/). We can run it as a python module locally first using the command line.

The below cell transfers some of our variables to the command line as well as create a job directory including a timestamp.

You can change the model_type to try out different models.

In [None]:
current_time = datetime.now().strftime('%y%m%d_%H%M%S')
model_type='dnn'

os.environ['MODEL_TYPE']=model_type
os.environ['JOB_DIR']="gs://{}/mnist_{}_{}".format(BUCKET,model_type,current_time)
os.environ['JOB_NAME']="mnist_{}_{}".format(model_type, current_time)

The cell below runs the local version of the code. The epochs and steps_per_epoch flag can be changed to run for longer or shorther, as defined in our `mnistmodel_keras_only/trainer/task.py` file.

In [None]:
%%bash
python3 -m mnistmodel_keras_only.trainer.task \
    --job-dir=$JOB_DIR \
    --epochs=5 \
    --steps_per_epoch=50 \
    --model_type=$MODEL_TYPE

## Training on the cloud

Since we're using an unreleased version of TensorFlow on AI Platform, we can instead use a [Deep Learning Container](https://cloud.google.com/ai-platform/deep-learning-containers/docs/overview) in order to take advantage of libraries and applications not normally packaged with AI Platform. Below is a simple [Dockerlife](https://docs.docker.com/engine/reference/builder/) which copies our code to be used in a TF2 environment.

In [None]:
%%writefile mnistmodel_keras_only/Dockerfile
FROM gcr.io/deeplearning-platform-release/tf2-cpu
COPY mnistmodel_keras_only/trainer mnistmodel/trainer
ENTRYPOINT ["python3", "-m", "mnistmodel.trainer.task"]

The below command builds the image and ships it off to Google Cloud so it can be used for AI Platform. When built, it will show up [here](http://console.cloud.google.com/gcr) with the name `mnistmodel`. ([Click here](https://console.cloud.google.com/cloud-build) to enable Cloud Build)

<b><p style='color:red'>This below `docker buid` and `docker push` command will not run in this laptop. It will only run in `AI Platmorm` notebook. So stop here.</p></b>

In [None]:
%%bash
docker build -f mnistmodel_keras_only/Dockerfile -t $IMAGE_URI ./

In [None]:
%%bash
docker push $IMAGE_URI

Finally, we can kickoff the [AI Platform training job](https://cloud.google.com/sdk/gcloud/reference/ai-platform/jobs/submit/training). We can pass in our docker image using the `master-image-uri` flag.

In [None]:
%%bash
echo $JOB_DIR $REGION $JOB_NAME
gcloud ai-platform jobs submit training $JOB_NAME \
    --staging-bucket=gs://$BUCKET \
    --region=$REGION \
    --master-image-uri=$IMAGE_URI \
    --scale-tier=BASIC_GPU \
    --job-dir=$JOB_DIR \
    -- \
    --model_type=$MODEL_TYPE

Can't wait to see the results? Run the code below and copy the output into the [Google Cloud Shell](https://console.cloud.google.com/home/dashboard?cloudshell=true) to follow.

## Deploying and predicting with model

Once you have a model you're proud of, let's deploy it! All we need to do is give AI Platform the location of the model. Below uses the keras export path of the previous job, but `${JOB_DIR}keras_export/` can always be changed to a different path.

Uncomment the delete commands below if you are getting an "already exists error" and want to deploy a new model.

In [None]:
%%bash
MODEL_NAME="mnist"
MODEL_VERSION=${MODEL_TYPE}
MODEL_LOCATION=${JOB_DIR}keras_export/
echo "Deleting and deploying $MODEL_NAME $MODEL_VERSION from $MODEL_LOCATION ... this will take a few minutes"
#yes | gcloud ai-platform versions delete ${MODEL_VERSION} --model ${MODEL_NAME}
#yes | gcloud ai-platform models delete ${MODEL_NAME}
gcloud ai-platform models create ${MODEL_NAME} --regions $REGION
gcloud ai-platform versions create ${MODEL_VERSION} \
    --model ${MODEL_NAME} \
    --origin ${MODEL_LOCATION} \
    --framework tensorflow \
    --runtime-version=2.1