# Run model module on GCP

In [1]:
import datetime
import json
import os

In [2]:
PROJECT = "machine-learning-1234"
BUCKET = "machine-learning-1234-bucket"
REGION = "us-central1"

# Import os environment variables
os.environ["PROJECT"] = PROJECT
os.environ["BUCKET"] =  BUCKET
os.environ["REGION"] = REGION
os.environ["TFVERSION"] = "2.2"
os.environ["PYTHON_VERSION"] = "3.7"

## Arguments

In [3]:
arguments = {}

# File arguments.
arguments["train_file_pattern"] = "gs://machine-learning-1234-bucket/gan/data/cifar10_car/train.tfrecord"
arguments["output_dir"] = "gs://machine-learning-1234-bucket/pca/trained_models/pca_test"

# Data parameters.
arguments["tf_record_example_schema"] = [
    {
        "name": "image_raw",
        "type": "FixedLen",
        "shape": [],
        "dtype": "str"
    },
    {
        "name": "label",
        "type": "FixedLen",
        "shape": [],
        "dtype": "int"
    }
]
arguments["image_feature_name"] = "image_raw"
arguments["image_encoding"] = "raw"
arguments["image_height"] = 32
arguments["image_width"] = 32
arguments["image_depth"] = 3
arguments["label_feature_name"] = "label"

# Training parameters.
arguments["tf_version"] = 2.2
arguments["use_graph_mode"] = True
arguments["distribution_strategy"] = "Mirrored"
arguments["train_dataset_length"] = 5000
arguments["train_batch_size"] = 32
arguments["input_fn_autotune"] = False
arguments["save_checkpoints_steps"] = 10
arguments["keep_checkpoint_max"] = 1000

# ResNet parameters.
arguments["resnet_weights"] = "imagenet"
arguments["resnet_layer_name"] = "conv4_block1_0_conv"
arguments["preprocess_input"] = True

# PCA parameters.
arguments["num_cols"] = 1024
arguments["use_sample_covariance"] = True
arguments["top_k_pc"] = 5


In [4]:
arguments

{'train_file_pattern': 'gs://machine-learning-1234-bucket/gan/data/cifar10_car/train.tfrecord',
 'output_dir': 'gs://machine-learning-1234-bucket/pca/trained_models/pca_test',
 'tf_record_example_schema': [{'name': 'image_raw',
   'type': 'FixedLen',
   'shape': [],
   'dtype': 'str'},
  {'name': 'label', 'type': 'FixedLen', 'shape': [], 'dtype': 'int'}],
 'image_feature_name': 'image_raw',
 'image_encoding': 'raw',
 'image_height': 32,
 'image_width': 32,
 'image_depth': 3,
 'label_feature_name': 'label',
 'tf_version': 2.2,
 'use_graph_mode': True,
 'distribution_strategy': 'Mirrored',
 'train_dataset_length': 5000,
 'train_batch_size': 32,
 'input_fn_autotune': False,
 'save_checkpoints_steps': 10,
 'keep_checkpoint_max': 1000,
 'resnet_weights': 'imagenet',
 'resnet_layer_name': 'conv4_block1_0_conv',
 'preprocess_input': True,
 'num_cols': 1024,
 'use_sample_covariance': True,
 'top_k_pc': 5}

In [5]:
# Import os environment variables for file hyperparameters.
os.environ["TRAIN_FILE_PATTERN"] = arguments["train_file_pattern"]
os.environ["OUTPUT_DIR"] = arguments["output_dir"]

# Import os environment variables for data hyperparameters.
os.environ["TF_RECORD_EXAMPLE_SCHEMA"] = json.dumps(
    arguments["tf_record_example_schema"]
).replace(" ", ";")
os.environ["IMAGE_FEATURE_NAME"] = arguments["image_feature_name"]
os.environ["IMAGE_ENCODING"] = arguments["image_encoding"]
os.environ["IMAGE_HEIGHT"] = str(arguments["image_height"])
os.environ["IMAGE_WIDTH"] = str(arguments["image_width"])
os.environ["IMAGE_DEPTH"] = str(arguments["image_depth"])
os.environ["LABEL_FEATURE_NAME"] = arguments["label_feature_name"]

# Import os environment variables for train hyperparameters.
os.environ["TF_VERSION"] = str(arguments["tf_version"])
os.environ["USE_GRAPH_MODE"] = str(arguments["use_graph_mode"])
os.environ["DISTRIBUTION_STRATEGY"] = arguments["distribution_strategy"]

os.environ["TRAIN_DATASET_LENGTH"] = str(arguments["train_dataset_length"])
os.environ["TRAIN_BATCH_SIZE"] = str(arguments["train_batch_size"])
os.environ["INPUT_FN_AUTOTUNE"] = str(arguments["input_fn_autotune"])
os.environ["SAVE_CHECKPOINTS_STEPS"] = (
    str(arguments["save_checkpoints_steps"])
)
os.environ["KEEP_CHECKPOINT_MAX"] = str(arguments["keep_checkpoint_max"])

# Import os environment variables for ResNet hyperparameters.
os.environ["RESNET_WEIGHTS"] = arguments["resnet_weights"]
os.environ["RESNET_LAYER_NAME"] = arguments["resnet_layer_name"]
os.environ["PREPROCESS_INPUT"] = str(arguments["preprocess_input"])

# Import os environment variables for PCA hyperparameters.
os.environ["NUM_COLS"] = str(arguments["num_cols"])
os.environ["USE_SAMPLE_COVARIANCE"] = str(arguments["use_sample_covariance"])
os.environ["TOP_K_PC"] = str(arguments["top_k_pc"])


## Config

In [None]:
%%writefile config.yaml
trainingInput:
  scaleTier: CUSTOM
  masterType: n1-highmem-16
  masterConfig:
    acceleratorConfig:
      count: 2
      type: NVIDIA_TESLA_V100

In [None]:
%%writefile config.yaml
trainingInput:
  scaleTier: CUSTOM
  masterType: n1-highmem-96
  masterConfig:
    acceleratorConfig:
      count: 8
      type: NVIDIA_TESLA_V100

## Train PCA model

In [6]:
%%bash
echo ${OUTPUT_DIR}

gs://machine-learning-1234-bucket/pca/trained_models/pca_test


In [None]:
%%bash
gsutil -m rm -rf ${OUTPUT_DIR}

In [None]:
%%bash
JOBNAME=pca_$(date -u +%y%m%d_%H%M%S)
echo ${OUTPUT_DIR} ${REGION} ${JOBNAME}
gcloud ai-platform jobs submit training ${JOBNAME} \
    --region=${REGION} \
    --module-name=trainer.task \
    --package-path=$PWD/pca_out_of_core_distributed_module/trainer \
    --job-dir=${OUTPUT_DIR} \
    --staging-bucket=gs://${BUCKET} \
    --config=config.yaml \
    --runtime-version=${TFVERSION} \
    --python-version=${PYTHON_VERSION} \
    -- \
    --train_file_pattern=${TRAIN_FILE_PATTERN} \
    --output_dir=${OUTPUT_DIR} \
    --job-dir=./tmp \
    \
    --tf_record_example_schema=${TF_RECORD_EXAMPLE_SCHEMA} \
    --image_feature_name=${IMAGE_FEATURE_NAME} \
    --image_encoding=${IMAGE_ENCODING} \
    --image_height=${IMAGE_HEIGHT} \
    --image_width=${IMAGE_WIDTH} \
    --image_depth=${IMAGE_DEPTH} \
    --label_feature_name=${LABEL_FEATURE_NAME} \
    \
    --tf_version=${TF_VERSION} \
    --use_graph_mode=${USE_GRAPH_MODE} \
    --distribution_strategy=${DISTRIBUTION_STRATEGY} \
    --train_dataset_length=${TRAIN_DATASET_LENGTH} \
    --train_batch_size=${TRAIN_BATCH_SIZE} \
    --input_fn_autotune=${INPUT_FN_AUTOTUNE} \
    --save_checkpoints_steps=${SAVE_CHECKPOINTS_STEPS} \
    --keep_checkpoint_max=${KEEP_CHECKPOINT_MAX} \
    \
    --resnet_weights=${RESNET_WEIGHTS} \
    --resnet_layer_name=${RESNET_LAYER_NAME} \
    --preprocess_input=${PREPROCESS_INPUT} \
    \
    --num_cols=${NUM_COLS} \
    --use_sample_covariance=${USE_SAMPLE_COVARIANCE} \
    --top_k_pc=${TOP_K_PC}