In [1]:
# Import libraries and modules
import os
import tensorflow as tf

from pca_out_of_core_distributed_module.trainer import model
from pca_out_of_core_distributed_module.trainer import training_inputs

# Local Development

## Arguments

In [2]:
arguments = {}

# File arguments.
arguments["train_file_pattern"] = "gs://machine-learning-1234-bucket/gan/data/cifar10_car/train.tfrecord"
arguments["output_dir"] = "trained_models/pca"

# Data parameters.
arguments["tf_record_example_schema"] = [
    {
        "name": "image_raw",
        "type": "FixedLen",
        "shape": [],
        "dtype": "str"
    },
    {
        "name": "label",
        "type": "FixedLen",
        "shape": [],
        "dtype": "int"
    }
]
arguments["image_feature_name"] = "image_raw"
arguments["image_encoding"] = "raw"
arguments["image_height"] = 32
arguments["image_width"] = 32
arguments["image_depth"] = 3
arguments["label_feature_name"] = "label"

# Training parameters.
arguments["tf_version"] = 2.2
arguments["use_graph_mode"] = True
arguments["distribution_strategy"] = "Mirrored"
arguments["train_dataset_length"] = 5000
arguments["train_batch_size"] = 32
arguments["input_fn_autotune"] = False
arguments["save_checkpoints_steps"] = 10
arguments["keep_checkpoint_max"] = 1000

# ResNet parameters.
arguments["resnet_weights"] = "imagenet"
arguments["resnet_layer_name"] = "conv4_block1_0_conv"
arguments["preprocess_input"] = True

# PCA parameters.
arguments["num_cols"] = 1024
arguments["use_sample_covariance"] = True
arguments["top_k_pc"] = 5


## Dataset

In [3]:
dataset = training_inputs.read_dataset(
    file_pattern=arguments["train_file_pattern"],
    batch_size=arguments["train_batch_size"],
    params={
        "tf_record_example_schema": arguments["tf_record_example_schema"],
        "image_feature_name": arguments["image_feature_name"],
        "image_encoding": arguments["image_encoding"],
        "image_height": arguments["image_height"],
        "image_width": arguments["image_width"],
        "image_depth": arguments["image_depth"],
        "label_feature_name": arguments["label_feature_name"],
        "input_fn_autotune": arguments["input_fn_autotune"]
    }
)()

Instructions for updating:
Use `tf.data.Dataset.interleave(map_func, cycle_length, block_length, num_parallel_calls=tf.data.experimental.AUTOTUNE)` instead. If sloppy execution is desired, use `tf.data.Options.experimental_deterministic`.


In [4]:
image_count = 0
for batch in dataset:
    image_count += batch.shape[0]
print(image_count)
print(batch.shape)

5000
(8, 32, 32, 3)


## Train model

In [5]:
trainer = model.TrainModel(params=arguments)

In [6]:
trainer.train_model()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')
Number of devices = 2
Loading latest checkpoint: trained_models/pca/checkpoints/ckpt-78
Checkpoint saved at trained_models/pca/checkpoints/ckpt-78
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: trained_models/pca/export/20201110232937/assets


## Prediction

In [7]:
def get_saved_model_serving_signatures(export_name, params):
    """Gets SavedModel's serving signatures for inference.

    Args:
        export_name: str, name of exported SavedModel.
        params: dict, user passed parameters.

    Returns:
        Loaded SavedModel and its serving signatures for inference.
    """
    loaded_model = tf.saved_model.load(
        export_dir=os.path.join(
            params["output_dir"], "export", export_name
        )
    )
    print("signature_keys = {}".format(list(loaded_model.signatures.keys())))

    infer = loaded_model.signatures["serving_default"]
    print("structured_outputs = {}".format(infer.structured_outputs))

    # Loaded model also needs to be returned so that infer can find the
    # variables within the graph in the outer scope.
    return loaded_model, infer


In [8]:
def get_predictions(export_name, images, params):
    """Gets SavedModel's serving signatures for inference.

    Args:
        export_name: str, name of exported SavedModel.
        images: tensor, rank 4 image tensor of shape
            (batch_size, image_height, image_width, image_depth).
        params: dict, user passed parameters.

    Returns:
        Loaded SavedModel and its serving signatures for inference.
    """
    loaded_model, infer = get_saved_model_serving_signatures(
        export_name, params
    )

    predictions = infer(serving_inputs=images)

    return predictions


In [9]:
predictions = get_predictions(
    export_name="20201110232937", images=batch, params=arguments
)

signature_keys = ['serving_default']
structured_outputs = {'tf_op_layer_pca_projections': TensorSpec(shape=(None, 5), dtype=tf.float32, name='tf_op_layer_pca_projections')}


In [12]:
predictions["tf_op_layer_pca_projections"].shape

TensorShape([8, 5])