In [1]:
# Copyright 2020 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# An MNIST example for tensorflow-cloud on Google Colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/tensorflow/cloud/blob/master/tests/integration/call_run_within_nb_on_colab.ipynb)

This colab shows an example for using Keras to build a simple ConvNet model for MNIST, and utilize [tensorflow-cloud](https://github.com/tensorflow/cloud) to train the model on GCP. 

The example demonstrates the workflow of tensorflow-cloud. For the model definition part it is completely identical to what you would do for training locally (or on Colab); and with a simple call of `tfc.run()`, the training job can be moved to GCP. 

## Setup and authentication

Note that the set up and authentication steps may be needed every time if using hosted colab session. Local runtime saves this trouble.

### **PIP Install Packages and dependencies**

Install tensorflow-cloud package.
Please comment out first line after running this cell.


In [None]:
import os
import sys

try:
  import tensorflow_cloud as tfc
except:
  os.system('pip install -U --quiet tensorflow-cloud')
  import tensorflow_cloud as tfc
import tensorflow_datasets as tfds
import tensorflow as tf
print(tf.__version__)


**Note:** Try installing using `sudo`, if the above command throw any permission errors.
Restart runtime session if cannot import.

### **Set up your GCP Project Id**

Enter your `Project Id` in the cell below. Then run the  cell to make sure the
Cloud SDK uses the right project for all the commands in this notebook.

In [None]:
PROJECT_ID = "[gcp-project-id]" #@param {type:"string"}
COMPUTE_REGION = "us-central1" #@param {type:"string"}

### **Authenticate your GCP account**
Follow https://github.com/tensorflow/cloud/blob/master/README.md#setup-instructions to get json key. Then proceed:

In [None]:
# Upload the downloaded JSON file that contains your key.

if 'google.colab' in sys.modules:    
 from google.colab import files
 keyfile_upload = files.upload()
 keyfile = list(keyfile_upload.keys())[0]
 os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = keyfile
 os.system(f'gcloud auth activate-service-account --key-file {keyfile}')

### **Specify Cloud Storage bucket**

To create bucket, follow:
https://cloud.google.com/ai-platform/docs/getting-started-keras#create_a_bucket

The bucket will both be used for creating docker image and for saving results. 

In [None]:
BUCKET_NAME = "[gcs-bucket-name]" #@param {type:"string"}
MODEL_PATH = "examples-colab" #@param {type:"string"}

## Testing
This section include code for preparing data and training. These can be run with out without GCP and tensorflow_cloud. Before using GCP run, it is adviced to first test out here, possibly with smaller data size.  Once ready, this section do not need to be changed.

### **Prepare data**



In [None]:
tfds.disable_progress_bar()
# Download the dataset
datasets, info = tfds.load(name="mnist", with_info=True, as_supervised=True)
mnist_train, mnist_test = datasets["train"], datasets["test"]

# Setup input pipeline
num_train_examples = info.splits["train"].num_examples
num_test_examples = info.splits["test"].num_examples

BUFFER_SIZE = 10000
BATCH_SIZE = 64


def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255

    return image, label


train_dataset = mnist_train.map(scale).cache()
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
eval_dataset = mnist_test.map(scale).batch(BATCH_SIZE)

### **Create and train model locally**

In [None]:
# Create the model
model = tf.keras.Sequential(
    [
        tf.keras.layers.Conv2D(32, 3, activation="relu", input_shape=(28, 28, 1)),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation="relu"),
        tf.keras.layers.Dense(10, activation="softmax"),
    ]
)

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=tf.keras.optimizers.Adam(),
    metrics=["accuracy"],
)


# Function for decaying the learning rate.
# You can define any decay function you need.
def decay(epoch):
    if epoch < 3:
        return 1e-3
    elif epoch >= 3 and epoch < 7:
        return 1e-4
    else:
        return 1e-5


# Callback for printing the LR at the end of each epoch.
class PrintLR(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        print(
            "\nLearning rate for epoch {} is {}".format(
                epoch + 1, model.optimizer.lr.numpy()
            )
        )


callbacks = [tf.keras.callbacks.LearningRateScheduler(decay), PrintLR()]

model.fit(train_dataset, epochs=12, callbacks=callbacks)

### **Save to GCS bucket**
When moving on to training on GCP, the trained model will be lost after training is complete unless it is saved on a cloud location.

In [None]:
if BUCKET_NAME:
  print('saving to GCS location...')
  model.save(f'gs://{BUCKET_NAME}/{MODEL_PATH}')
else:
  print('saving to local path')
  model.save(MODEL_PATH)


## Using tensorflow_cloud


### **Training on GCP**
After above cell is tested, run following cell to use GCP for training.

In [None]:
# requirements file for extra pip dependencies
f = open('requirements.txt', 'w')
f.write('tensorflow-datasets\n')
f.write('pandas')
f.close()

# Calling `run` from within a script with contains the Keras model.
# Comment out this line for a local run to debug. 
tfc.run(
    entry_point=None,
    distribution_strategy="auto",
    requirements_txt="requirements.txt",
    chief_config=tfc.MachineConfig(
        cpu_cores=8,
        memory=30,
        accelerator_type=tfc.AcceleratorType.NVIDIA_TESLA_T4,
        accelerator_count=2,
    ),
    docker_config=tfc.DockerConfig(
        image_build_bucket=BUCKET_NAME,
    ),
    worker_count=0
)

### **Evaluate the model.**

In [None]:
model = tf.keras.models.load_model(f'gs://{BUCKET_NAME}/{MODEL_PATH}')
model.evaluate(eval_dataset)