##### Copyright 2021 The TensorFlow Cloud Authors.

In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Running vision models from TF Model Garden on GCP with TF Cloud

<table class="tfo-notebook-buttons" align="left">
  <td>
    <!-- <a target="_blank" href="https://www.tensorflow.org/cloud/tutorials/overview.ipynb"><img src="https://www.tensorflow.org/images/tf_logo_32px.png" />View on TensorFlow.org</a> MSSING HREF -->
  </td>
  <td>
    <!-- <a target="_blank" href="https://colab.research.google.com/github/tensorflow/cloud/blob/master/g3doc/tutorials/overview.ipynb""><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a> MSSING HREF -->
  </td>
  <td>
    <!-- <a target="_blank" href="https://github.com/tensorflow/cloud/blob/master/g3doc/tutorials/overview.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View on GitHub</a> MSSING HREF -->
  </td>
  <td>
    <!-- <a href="https://storage.googleapis.com/tensorflow_docs/cloud/tutorials/overview.ipynb"><img src="https://www.tensorflow.org/images/download_logo_32px.png" />Download notebook</a> MSSING HREF -->
  </td>
  <td>
        <!-- <a href="https://kaggle.com/kernels/welcome?src=https://github.com/tensorflow/cloud/blob/master/g3doc/tutorials/overview.ipynb" target="blank"> <img width="90" src="https://www.kaggle.com/static/images/site-logo.png" alt="Kaggle logo" />Run in Kaggle</a>MSSING HREF MSSING HREF -->
  </td>
</table>

In this example we will use [run_models](https://github.com/tensorflow/cloud/blob/690c3eee65dadee8af260a19341ff23f42f1f070/src/python/tensorflow_cloud/core/experimental/models.py#L30) from the experimental module of TF Cloud to train a ResNet model from [TF Model Garden](https://github.com/tensorflow/models/tree/master/official) on [imagenette from TFDS](https://www.tensorflow.org/datasets/catalog/imagenette).

## Install Packages

We need the nightly version of tensorflow-cloud that we can get from github, the official release of tf-models-official, and keras 2.6.0rc0 for compatibility.

In [None]:
!pip install -q 'git+https://github.com/tensorflow/cloud.git@refs/pull/352/head#egg=tensorflow-cloud&subdirectory=src/python' tf-models-official keras==2.6.0rc0

## Import required modules

In [2]:
import os
import sys

import tensorflow_cloud as tfc
from tensorflow_cloud.core.experimental.models import run_models

print(tfc.__version__)

0.1.17.dev


## Project Configurations
Setting project parameters. For more details on Google Cloud Specific parameters please refer to [Google Cloud Project Setup Instructions](https://www.kaggle.com/nitric/google-cloud-project-setup-instructions/).

In [None]:
# Set Google Cloud Specific parameters

# TODO: Please set GCP_PROJECT_ID to your own Google Cloud project ID.
GCP_PROJECT_ID = 'YOUR_PROJECT_ID' #@param {type:"string"}

# TODO: set GCS_BUCKET to your own Google Cloud Storage (GCS) bucket.
GCS_BUCKET = 'YOUR_GCS_BUCKET_NAME' #@param {type:"string"}

# DO NOT CHANGE: Currently only the 'us-central1' region is supported.
REGION = 'us-central1'

# OPTIONAL: You can change the job name to any string.
JOB_NAME = 'run_models_demo' #@param {type:"string"}

## Authenticating the notebook to use your Google Cloud Project

This code authenticates the notebook, checking your valid Google Cloud credentials and identity. It is inside the `if not tfc.remote()` block to ensure that it is only run in the notebook, and will not be run when the notebook code is sent to Google Cloud.

Note: For Kaggle Notebooks click on "Add-ons"->"Google Cloud SDK" before running the cell below.

In [None]:
if not tfc.remote():

    # Authentication for Kaggle Notebooks
    if "kaggle_secrets" in sys.modules:
        from kaggle_secrets import UserSecretsClient
        UserSecretsClient().set_gcloud_credentials(project=GCP_PROJECT_ID)

    # Authentication for Colab Notebooks
    if "google.colab" in sys.modules:
        from google.colab import auth
        auth.authenticate_user()
        os.environ["GOOGLE_CLOUD_PROJECT"] = GCP_PROJECT_ID

## Set up TensorFlowCloud run

Set up parameters for tfc.run(). The chief_config, worker_count and worker_config will be set up individually for each distribution strategy. For more details refer to [TensorFlow Cloud overview tutorial](https://colab.research.google.com/github/tensorflow/cloud/blob/master/g3doc/tutorials/overview.ipynb)

In [None]:
with open('requirements.txt','w') as f:
    f.write('git+https://github.com/tensorflow/cloud.git@refs/pull/352/head#egg=tensorflow-cloud&subdirectory=src/python\n'+
            'tf-models-official\n'+
            'keras==2.6.0rc0')

run_kwargs = dict(
    requirements_txt = 'requirements.txt',
    docker_config=tfc.DockerConfig(
        parent_image="gcr.io/deeplearning-platform-release/tf2-gpu.2-5",
        image_build_bucket=GCS_BUCKET
    ),
    chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_4X"],
    job_labels={'job': JOB_NAME}
)

## Run the training using run_models

In [None]:
values = run_models(
    'imagenette',
    'resnet',
    GCS_BUCKET,
    'train',
    'validation',
    **run_kwargs,
    )

## Training Results

### Reconnect your Colab instance

Most remote training jobs are long running. If you are using Colab, it may time out before the training results are available.

In that case, **rerun the following sections in order** to reconnect and configure your Colab instance to access the training results.

1.   Import required modules
2.   Project Configurations
3.   Authenticating the notebook to use your Google Cloud Project

**DO NOT** rerun the rest of the code.

### Load Tensorboard
While the training is in progress you can use Tensorboard to view the results. Note the results will show only after your training has started. This may take a few minutes.

In [None]:
if not tfc.remote():
  %load_ext tensorboard
  tensorboard_logs_dir = values['tensorboard_logs']
  %tensorboard --logdir $tensorboard_logs_dir

### Load your trained model

Once training is complete, you can retrieve your model from the GCS Bucket you  specified above.

In [None]:
import tensorflow as tf
if not tfc.remote():
  trained_model = tf.keras.models.load_model(values['saved_model'])
  trained_model.summary()