In [None]:
%%bash
[ ! -d "/content/code-t5" ] && git clone 'https://github.com/bzz/code-t5.git'
cd code-t5/
git pull origin master --rebase

In [None]:
print("Installing dependencies...")
%tensorflow_version 2.x
!pip install -qr code-t5/requirements-train.txt

In [None]:
import os

BASE_DIR = "gs://t5-codex" #@param { type: "string" }
if not BASE_DIR or BASE_DIR == "gs://":
  raise ValueError("You must enter a BASE_DIR.")
DATA_DIR = os.path.join(BASE_DIR, "data")
MODELS_DIR = os.path.join(BASE_DIR, "models")

MODEL_SIZE = "arch-lm_v1-lm" #@param["small", "base", "base-t5.1.1", "base_shared", "base_shared_1k", "base-top5k", "base-top5k", "lm_ifa_1k", "arch-lm_v1-lm", "large", "3B", "11B"]
MODEL_DIR = os.path.join(MODELS_DIR, MODEL_SIZE)
CACHE_DIR="${BASE_DIR}/cache"


ON_CLOUD = True

TRAIN_STEPS = 200000 #@param {type: "integer"}


In [None]:
import functools
import time
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

import tensorflow.compat.v1 as tf
import tensorflow_datasets as tfds

import t5
import t5.models
import seqio


if ON_CLOUD:
  print("Setting up GCS access...")
  import tensorflow_gcs_config
  from google.colab import auth
  # Set credentials for GCS reading/writing from Colab and TPU.
  TPU_TOPOLOGY = "v2-8"
  try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
    TPU_ADDRESS = tpu.get_master()
    print('Running on TPU:', TPU_ADDRESS)
  except ValueError:
    raise BaseException('ERROR: Not connected to a TPU runtime')
  auth.authenticate_user()
  tf.enable_eager_execution()
  tf.config.experimental_connect_to_host(TPU_ADDRESS)
  tensorflow_gcs_config.configure_gcs_from_colab_auth()


tf.disable_v2_behavior()

# Improve logging.
from contextlib import contextmanager
import logging as py_logging

if ON_CLOUD:
  tf.get_logger().propagate = False
  py_logging.root.setLevel('INFO')

@contextmanager
def tf_verbosity_level(level):
  log_level = tf.logging.get_verbosity()
  tf.logging.set_verbosity(level)
  yield
  tf.logging.set_verbosity(log_level)

# Dataset

In [None]:
import sys
sys.path.insert(0,'/content/code-t5')

## Preview

In [None]:
# the list of Tasks we support https://github.com/google/seqio#defining-a-task
import codeT5.tasks
seqio.TaskRegistry.names()

In [None]:
import codeT5.tasks
import gin
import seqio.utils
from t5.data import preprocessors

task = seqio.TaskRegistry.get("fl_py_50stars_top5k_2019")
seqio.utils.add_global_cache_dirs([os.path.join(BASE_DIR, "cache")])

vocab = codeT5.tasks.vocab

ds = task.get_dataset(split="validation", sequence_length={"inputs": 128, "targets": 32}, use_cached=True)
with gin.unlock_config(): 
  ## un-comment to configure preprocessing for unsupervised LM pre-training
  # gin.bind_parameter("preprocessors.unsupervised.preprocessors", [
  #   preprocessors.select_random_chunk,
  #   preprocessors.reduce_concat_tokens,
  #   preprocessors.split_tokens_to_targets_length,
  # ])
  gin.bind_parameter("preprocessors.select_random_chunk.max_length", 65536)


print("A few preprocessed validation examples...")
for ex in tfds.as_numpy(ds.take(5)):
  print(ex)
  if "inputs" in ex:
    print("inputs: '" + vocab.decode(ex['inputs'].tolist()).replace("Ċ", "\n") + "'")
    print()
  print("targets: '" + vocab.decode(ex['targets'].tolist()).replace("Ċ", "\n") + "'")
  print("\n\n")

## Token-level stats


In [None]:
import codeT5

# Comment this task in tasks.py first!
#
# A new task that does not sample, truncate or pack examples
# the name has to be the same, to levirage GCS cache
seqio.TaskRegistry.add(
    "bq_py_2016_minus_ethpy150",
    source=seqio.TextLineDataSource(
        split_to_filepattern=codeT5.tasks.bq_py_2016_minus_ethpy150_paths,
        num_input_examples={"train": 5884757, "validation": 1292044},
    ),
    preprocessors=[
        codeT5.tasks.fl_preprocessor,
        seqio.preprocessors.tokenize,
        seqio.CacheDatasetPlaceholder(),
    ],
    metric_fns=[],
    output_features=codeT5.tasks.DEFAULT_OUTPUT_FEATURES)

In [None]:
seqio.TaskRegistry.names()

In [None]:
gh_task = seqio.TaskRegistry.get("bq_py_2016_minus_ethpy150")
seqio.utils.add_global_cache_dirs([os.path.join(BASE_DIR, "cache")])
raw_ds = gh_task.get_dataset(sequence_length=None, split="validation", use_cached=True)
vocab = codeT5.tasks.vocab

ds_len = raw_ds.map(lambda x: tf.size(x['targets']))

for ex in tfds.as_numpy(ds_len.take(5)):
  print(ex)

In [None]:
# did not finish on TXT 1.5h on Collab, but took 20 min on cached dataset
df = tfds.as_dataframe(ds_len)

In [None]:
df.max()

In [None]:
df.hist(bins=30, log=True)

In [None]:
df.to_pickle("/content/bq_val_len.pkl")

In [None]:
raw_train_ds = gh_task.get_dataset(sequence_length=None, split="train", use_cached=True)
ds_train_len = raw_train_ds.map(lambda x: tf.size(x['targets']))
df_train = tfds.as_dataframe(ds_train_len)

In [None]:
df_train.max()

In [None]:
df_train.hist(bins=30, log=True)

In [None]:
df.to_pickle("/content/bq_train_len.pkl")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!cp /content/bq_val_len.pkl '/content/drive/My Drive/'
!cp /content/bq_train_len.pkl '/content/drive/My Drive/'

# Monitoring

In [None]:
%%bash

pip install -U tensorboard-plugin-profile #"cloud-tpu-profiler>=2.3.0"

# fixes a bug in the plugin that prevents loading profile samples when the run name is "."
# TODO(bzz): submit a patch upstream to 'tensorflow/profiler'
plugin='/usr/local/lib/python3.7/dist-packages/tensorboard_plugin_profile/profile_plugin.py'
if [[ -f "${plugin}" ]]; then
  patch "${plugin}" < code-t5/profile_plugin.patch || echo "Patching ${plugin} failed"
fi


In [None]:
if ON_CLOUD:
  %reload_ext tensorboard
%tensorboard --logdir="$MODEL_DIR"

In [None]:
!kill 2273

In [None]:
10.76.12.218:8466

# Cache

Cache the dataset in .tfrecord format (depends on Apache Beam) on GCS.

Needs to be done only once for each Task (dataset), all 4 Python datasets are already cached.

In [None]:
!pip install apache-beam[gcp] python-snappy

In [None]:
# works only with TextLineDataSource
!cd code-t5 && python -m seqio.scripts.cache_tasks_main \
 --tasks=py_50stars_top5k_2019 \
 --module_import=codeT5.tasks \
 --output_cache_dir='gs://t5-codex/cache' \
 --alsologtostderr

# Train

![model architecture](https://i.imgur.com/BHuHUP2.png)

## bi_v1_shared prefix_lm

Encoder-decoder models \w encoder and decoder parameters shared, trained using unsupervised objective for "prefix lm" modeling.

### Base

Train 2xBERT-base 220M param model (Total size: 138M) on top5k repos with >50 stars dataset (~400M tokes).

In [None]:
#cache, v2-8, model_parallelism = 1
!cd code-t5/ && python3 -m t5.models.mesh_transformer_main  \
  --tpu="$TPU_ADDRESS" \
  --model_dir="$MODEL_DIR" \
  --module_import="codeT5" \
  --additional_task_cache_dirs='$BASE_DIR/cache' \
  --gin_location_prefix="codeT5/gin/" \
  --gin_file="models/shared-prefix_lm.gin" \
  --gin_param="utils.tpu_mesh_shape.model_parallelism = 1" \
  --gin_param="utils.tpu_mesh_shape.tpu_topology = '$TPU_TOPOLOGY'" \
  --gin_param="utils.run.train_steps = $TRAIN_STEPS" \
  --gin_param="utils.run.keep_checkpoint_max = 8" \
  --gin_param="tokens_per_batch = 65536" \
  --gin_param="serialize_num_microbatches.tokens_per_microbatch_per_replica = None" \
  --gin_param="MIXTURE_NAME = 'fl_bq_py_mix'" \
  --gin_param="mesh_train_dataset_fn.use_cached = True"
  
  # --gin_file="dataset/github_python_2016.gin" \
  # --gin_file="models/t5.1.1.base.gin" \
  # --gin_file="objectives/prefix_lm.gin" \

  # DOES NOT WORK ON COLAB!!! have to edit the .gin file :( 
  # --gin_param="utils.run.sequence_length = {'inputs': 1024, 'targets': 512}" 


In [None]:
!echo "$TPU_TOPOLOGY"

In [None]:
# cache, v2-8, model_parallelism = 2
!cd code-t5/ && python -m t5.models.mesh_transformer_main  \
  --tpu="$TPU_ADDRESS" \
  --model_dir="$MODEL_DIR" \
  --module_import="codeT5" \
  --additional_task_cache_dirs='$BASE_DIR/cache' \
  --gin_location_prefix="codeT5/gin/" \
  --gin_file="models/shared-prefix_lm.gin" \
  --gin_param="utils.tpu_mesh_shape.model_parallelism = 2" \
  --gin_param="utils.tpu_mesh_shape.tpu_topology = '$TPU_TOPOLOGY'" \
  --gin_param="serialize_num_microbatches.tokens_per_microbatch_per_replica = 2048" \
  --gin_param="tokens_per_batch= 131072" \
  --gin_param="run.train_steps = $TRAIN_STEPS" \
  --gin_param="run.keep_checkpoint_max = 8" \
  --gin_param="MIXTURE_NAME = 'all_py_2019_mix'" \
  --gin_param="mesh_train_dataset_fn.use_cached = True"

# utils.run batch_size = tokens_per_replica=2048, 2048*8/512*2 = 16 seq/batch, wich is overriden by tokens_per_batch = 65556



10.4.42.106:8466

### Large

Train a larger model, 2xBERT-large 770M param (Total size: 436M), on bigger dataset (2.1B tokens)

In [None]:
# cache, v2-8, model_parallelism = 2
!cd code-t5/ && python -m t5.models.mesh_transformer_main  \
  --tpu="$TPU_ADDRESS" \
  --model_dir="$MODEL_DIR" \
  --t5_tfds_data_dir="$DATA_DIR" \
  --module_import="codeT5" \
  --additional_task_cache_dirs='$BASE_DIR/cache' \
  --gin_location_prefix="codeT5/gin/" \
  --gin_file="models/shared-prefix_lm.gin" \
  --gin_file="models/bi_bert_large.gin" \
  --gin_param="utils.tpu_mesh_shape.model_parallelism = 2" \
  --gin_param="utils.tpu_mesh_shape.tpu_topology = '$TPU_TOPOLOGY'" \
  --gin_param="serialize_num_microbatches.tokens_per_microbatch_per_replica = 2048" \
  --gin_param="tokens_per_batch= 131072" \
  --gin_param="utils.run.train_steps = $TRAIN_STEPS" \
  --gin_param="utils.run.keep_checkpoint_max = 8" \
  --gin_param="utils.run.save_checkpoints_steps = 2000" \
  --gin_param="MIXTURE_NAME = 'all_py_2019_mix'" \
  --gin_param="mesh_train_dataset_fn.use_cached = True"



## LM

### lm_v1 lm

Autoregressive single-stack Transformer (GPT-like) trained to predict next tokens.

In [None]:
MODEL_SIZE="arch-lm_v1-lm" 
MODEL_DIR="${BUCKET}/models/${MODEL_SIZE}"
TRAIN_STEPS=524288

In [None]:
!cd code-t5/ && python3 -m t5.models.mesh_transformer_main  \
  --tpu="$TPU_ADDRESS" \
  --model_dir="$MODEL_DIR" \
  --module_import="codeT5" \
  --additional_task_cache_dirs="${CACHE_DIR}" \
  --gin_location_prefix="codeT5/gin/" \
  --gin_file="dataset/dataset.gin" \
  --gin_file="models/lm_v1.gin" \
  --gin_file="objectives/lm.gin" \
  --gin_param="mtf_model.MtfModel.model_type = 'lm'" \
  --gin_param="utils.run.model_type = 'lm'" \
  --gin_param="utils.tpu_mesh_shape.model_parallelism = 1" \
  --gin_param="utils.tpu_mesh_shape.tpu_topology = '$TPU_SIZE'" \
  --gin_param="utils.run.train_steps = $TRAIN_STEPS" \
  --gin_param="utils.run.sequence_length = {{'inputs': 512, 'targets': 1024}}" \
  --gin_param="utils.run.save_checkpoints_steps = 2000" \
  --gin_param="utils.run.keep_checkpoint_max = 25" \
  --gin_param="tokens_per_batch = 65536" \
  --gin_param="serialize_num_microbatches.tokens_per_microbatch_per_replica = None" \
  --gin_param="MIXTURE_NAME = '${TASK_NAME}'" \
  --gin_param="mesh_train_dataset_fn.use_cached = True"


### lm_v1_ifa prefix_lm

"delimited_lm" a single-stack Transforme with the full attention mask over the inputs, trained on prefix LM objective.

In [None]:
MODEL_SIZE = "lm_ifa_1k" 
MODEL_DIR = os.path.join(MODELS_DIR, MODEL_SIZE)
TRAIN_STEPS = 524288

In [None]:
!cd code-t5/ && python3 -m t5.models.mesh_transformer_main  \
  --tpu="$TPU_ADDRESS" \
  --model_dir="$MODEL_DIR" \
  --module_import="codeT5" \
  --additional_task_cache_dirs='$BASE_DIR/cache' \
  --gin_location_prefix="codeT5/gin/" \
  --gin_file="dataset/dataset.gin" \
  --gin_file="models/lm_v1_ifa.gin" \
  --gin_file="objectives/prefix_lm.gin" \
  --gin_param="select_random_chunk.max_length = 65536" \
  --gin_param="select_random_chunk.feature_key = 'targets'" \
  --gin_param="mtf_model.MtfModel.model_type = 'delimited_lm'" \
  --gin_param="utils.run.model_type = 'delimited_lm'" \
  --gin_param="utils.tpu_mesh_shape.model_parallelism = 1" \
  --gin_param="utils.tpu_mesh_shape.tpu_topology = '$TPU_TOPOLOGY'" \
  --gin_param="utils.run.train_steps = $TRAIN_STEPS" \
  --gin_param="utils.run.sequence_length = {{'inputs': 1024, 'targets': 512}}" \
  --gin_param="utils.run.save_checkpoints_steps = 12000" \
  --gin_param="utils.run.keep_checkpoint_max = 8" \
  --gin_param="tokens_per_batch = 65536" \
  --gin_param="serialize_num_microbatches.tokens_per_microbatch_per_replica = None" \
  --gin_param="MIXTURE_NAME = 'fl_bq_py_mix'" \
  --gin_param="mesh_train_dataset_fn.use_cached = True"


# Evaluate

## With decoding

In [None]:
!cd code-t5/ && python -m t5.models.mesh_transformer_main  \
  --tpu="$TPU_ADDRESS" \
  --model_dir="$MODEL_DIR" \
  --module_import="codeT5" \
  --additional_task_cache_dirs='$BASE_DIR/cache' \
  --gin_location_prefix="codeT5/gin/" \
  --gin_file="dataset/dataset.gin" \
  --gin_file="models/lm_v1_ifa.gin" \
  --gin_file="objectives/prefix_lm.gin" \
  --gin_file="sample_decode.gin" \
  --gin_file="eval.gin" \
  --gin_param="t5.models.mesh_transformer.mesh_eval_dataset_fn.num_eval_examples = 30" \
  --gin_param="Bitransformer.decode.temperature=0.6" \
  --gin_param="utils.tpu_mesh_shape.tpu_topology = '$TPU_TOPOLOGY'" \
  --gin_param="utils.tpu_mesh_shape.model_parallelism = 1" \
  --gin_param="utils.run.sequence_length = {{'inputs': 1024, 'targets': 512}}" \
  --gin_param="serialize_num_microbatches.tokens_per_microbatch_per_replica = None" \
  --gin_param="split = 'validation'" \
  --gin_param="eval_checkpoint_step = 'all'" \
  --gin_param="MIXTURE_NAME = 'fl_bq_py_mix'" \
  --gin_param="Bitransformer.decode.max_decode_length = 1024" \
  --gin_param="mesh_eval_dataset_fn.use_cached = True"


## Perplexity Eval

In [None]:
!cd code-t5/ && python -m t5.models.mesh_transformer_main  \
  --tpu="$TPU_ADDRESS" \
  --model_dir="$MODEL_DIR" \
  --module_import="codeT5" \
  --additional_task_cache_dirs='$BASE_DIR/cache' \
  --gin_location_prefix="codeT5/gin/" \
  --gin_file="dataset/dataset.gin" \
  --gin_file="models/lm_v1.gin" \
  --gin_file="objectives/lm.gin" \
  --gin_file="perplexity_eval.gin" \
  --gin_file="sample_decode.gin" \
  --gin_param="Bitransformer.decode.temperature=0.6" \
  --gin_param="utils.run.model_type = 'lm'" \
  --gin_param="utils.tpu_mesh_shape.tpu_topology = '$TPU_TOPOLOGY'" \
  --gin_param="utils.tpu_mesh_shape.model_parallelism = 1" \
  --gin_param="utils.run.sequence_length = {{'inputs': 512, 'targets': 1024}}" \
  --gin_param="serialize_num_microbatches.tokens_per_microbatch_per_replica = None" \
  --gin_param="split = 'validation'" \
  --gin_param="eval_checkpoint_step = 'all'" \
  --gin_param="MIXTURE_NAME = 'fl_bq_py_mix'" \
  --gin_param="Bitransformer.decode.max_decode_length = 1024" \
  --gin_param="mesh_eval_dataset_fn.use_cached = True"

#  --gin_file="models/shared-prefix_lm.gin" \
#  --gin_file="models/t5.1.1.base.gin" \
#  --gin_file="models/bi_bert_large.gin" \

# Export SavedModel

You can now export the trained model from the checkpoing into the format ready for serving the predictions over HTTP with [TF Serving](https://www.tensorflow.org/tfx/guide/serving)


In [None]:
PROJECT="data-analytics-experiments"
ZONE="europe-west4a"
EXPORT_DIR=os.path.join(MODEL_DIR, "export")

In [None]:
!cd code-t5/ && python -m t5.models.mesh_transformer_main \
  --gcp_project="$PROJECT" \
  --tpu_zone="$ZONE" \
  --model_dir="$MODEL_DIR" \
  --module_import="codeT5" \
  --use_model_api \
  --gin_param="mtf_model.MtfModel.model_type='lm'" \
  --temperature=0.6 \
  --keep_top_k=-1 \
  --mode="export_predict" \
  --export_dir="$EXPORT_DIR"

In [None]:
saved_model_path = os.path.join(EXPORT_DIR, max(tf.io.gfile.listdir(EXPORT_DIR)))

!saved_model_cli show --dir $saved_model_path  --all

# Predict

## Using Model API

Run inference locally using latest model snapshot from the Cloud Storage.

Depending on the model size and the device you are using (CPU/GPU/TPU) initialization of the model and loading the computation graph may take a while (several minutes).

### CLI

In [None]:
%sh
# TPU\GPU\CPU with latest model checkpoint

cd code-t5
wget 'https://raw.githubusercontent.com/google-research/google-research/master/cubert/source_code.py.test'

python -m t5.models.mesh_transformer_main  \
  --tpu="$TPU_ADDRESS" \
  --model_dir="$MODEL_DIR" \
  --module_import="codeT5.tasks" \
  --gin_location_prefix="codeT5/gin/" \
  --gin_file="models/shared-prefix_lm.gin" \
  --gin_file="models/bi_bert_large.gin" \
  --gin_file="beam_search.gin" \
  --gin_param="use_model_api = True" \
  --checkpoint_mode="latest"
  --input_file="source_code.py.test"
  --output_file="source_code.py.out"

  cat source_code.py.out


### Python - pyTorch API

In [None]:
bubble_sort = """from typing import List

def bubble_sort(numbers: List[int]):
    \"\"\" Sort given array of numbers in assending order using Bubble Sort algorithm.
    >>> bubble_sort([3.0, 2.0, 1.0])
    [1.0, 2.0, 3.0]

    >>> bubble_sort([3.0, 1.0, 2.0])
    [1.0, 2.0, 3.0]
    \"\"\"
""".replace("\n", "")

has_close = """from typing import List

def has_close_elements(numbers: List[float], threshold: float) -> bool:
    \"\"\" Check if in given list of numbers, are any two numbers closer to each other than
    given threshold.
    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)
    False
    >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)
    True
    \"\"\"
"""

In [None]:
# Model initialization/loading overhead for multiple consequent predicitons calls 
# can be avoided by loading the model once only though the pyTorch API.

# Run it on GPU backend

import functools
import t5
import torch
import transformers
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

t5_config = transformers.T5Config.from_dict({
  "architectures": [
    "T5WithLMHeadModel"
  ],
  "vocab_size": 32000,
  "d_ff": 3072,
  "d_kv": 64,
  "d_model": 768,
  "decoder_start_token_id": 0,
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "initializer_factor": 1.0,
  "is_encoder_decoder": False,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_heads": 12,
  "num_layers": 12,
  "output_past": True,
  "pad_token_id": 0,
  "relative_attention_num_buckets": 32,
  "task_specific_params": {})

model = t5.models.HfPyTorchModel(t5_config, "/content/hft5/", device)

pt_model = transformers.T5EncoderModel.from_pretrained(
    "gs://t5-codex/models/arch-lm_v1-lm/model.ckpt-259400.index",
    from_tf=True,
    config=t5_config)

model._model = pt_model


# Generate some predictions
inputs = [
    "public static void ",
    "import ten",
    bubble_sort,
    has_close,
]
model.predict(
    inputs,
    sequence_length={"inputs": 128},
    batch_size=2,
)


# Transformers lib can load TF checkpoins into pyTorch \wo manual conversion first (but it's slower)
#  https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained
#
# https://github.com/google-research/text-to-text-transfer-transformer/blob/main/t5/models/mtf_model.py
# https://github.com/google-research/text-to-text-transfer-transformer/issues/463#issuecomment-717580821


## From exported SavedModel

In [None]:
%tensorflow_version 2.x
!pip install tensorflow-text
from google.colab import auth
auth.authenticate_user()

In [None]:
import tensorflow as tf
import tensorflow_text  # Required to run exported model.

saved_model_path = os.path.join(EXPORT_DIR, max(tf.io.gfile.listdir(EXPORT_DIR)))

def load_predict_fn(model_path):
  if tf.executing_eagerly():
    print("Loading SavedModel in eager mode.")
    imported = tf.saved_model.load(model_path, ["serve"])
    return lambda x: imported.signatures['serving_default'](tf.constant(x))['outputs'].numpy()
  else:
    print("Loading SavedModel in tf 1.x graph mode.")
    tf.compat.v1.reset_default_graph()
    sess = tf.compat.v1.Session()
    meta_graph_def = tf.compat.v1.saved_model.load(sess, ["serve"], model_path)
    signature_def = meta_graph_def.signature_def["serving_default"]
    print("Input name: " + str(signature_def.inputs))
    return lambda x: sess.run(
        fetches=signature_def.outputs["outputs"].name, 
        feed_dict={signature_def.inputs["inputs"].name: x}
    )

predict_fn = load_predict_fn(saved_model_path)

In [None]:
def answer(question):
  return predict_fn([question])[0].decode('utf-8')

for question in ["password = ",
                  "def __main__():Ċ  ",
                  "import",
                  "a"]:
    print(answer(question))

In [None]:
import tensorflow as tf

tf.debugging.set_log_device_placement(False)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [None]:
!nvidia-smi -L

# Docker for serving prediction

In [None]:
export MODEL_NAME="py5k_prefix_lm"
export SAVED_MODEL_PATH="${PWD}/mtf-model-export"

sudo systemctl start docker

gsutil cp 'gs://t5-codex/models/large/export/1630574205' $SAVED_MODEL_PATH

# Download the TensorFlow Serving Docker image and repo:
docker pull tensorflow/serving:nightly

# First, run a serving image as a daemon:
docker run -d --name serving_base tensorflow/serving:nightly

# Next, copy the `SavedModel` to the container's model folder:
docker cp $SAVED_MODEL_PATH serving_base:/models/$MODEL_NAME

# Now, commit the container that's serving the model:
docker commit --change "ENV MODEL_NAME $MODEL_NAME" serving_base $MODEL_NAME

# Finally, save the image to a tar file:
docker save $MODEL_NAME -o $MODEL_NAME.tar

# stop `serving_base`:
docker kill serving_base

In [None]:
docker run -t --rm -p 8501:8501 --name "$MODEL_NAME-server" $MODEL_NAME &

curl -d '{"inputs": ["import tensorflow "]}' \
    -X POST "http://localhost:8501/v1/models/$MODEL_NAME:predict"

docker stop "$MODEL_NAME-server"

In [None]:
# 18.04 LTS https://docs.docker.com/engine/install/ubuntu/

!sudo apt-get update
!sudo apt-get install \
    apt-transport-https \
    ca-certificates \
    curl \
    gnupg \
    lsb-release

!curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
!echo \
  "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \
  $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null

!sudo apt-get update
!sudo apt-get install docker-ce docker-ce-cli containerd.io

In [None]:
!sudo service docker stop


In [None]:
!sudo docker run hello-world
