![tracker](https://us-central1-vertex-ai-mlops-369716.cloudfunctions.net/pixel-tracking?path=statmike%2Fvertex-ai-mlops%2FDev%2Fnew&file=Autoencoders.ipynb)
<!--- header table --->
<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/statmike/vertex-ai-mlops/blob/main/Dev/new/Autoencoders.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo">
      <br>Run in<br>Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https%3A%2F%2Fraw.githubusercontent.com%2Fstatmike%2Fvertex-ai-mlops%2Fmain%2FDev%2Fnew%2FAutoencoders.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo">
      <br>Run in<br>Colab Enterprise
    </a>
  </td>      
  <td style="text-align: center">
    <a href="https://github.com/statmike/vertex-ai-mlops/blob/main/Dev/new/Autoencoders.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      <br>View on<br>GitHub
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/statmike/vertex-ai-mlops/main/Dev/new/Autoencoders.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
      <br>Open in<br>Vertex AI Workbench
    </a>
  </td>
</table>

# Autoencoders

The basics of autoencoders from training to serving with application examples!

In [None]:
# package import
from tensorflow.python.framework import dtypes
from tensorflow_io.bigquery import BigQueryClient
import tensorflow as tf
from google.cloud import bigquery
from google.cloud import aiplatform
import argparse
import os
import sys

# import argument to local variables
parser = argparse.ArgumentParser()
# the passed param, dest: a name for the param, default: if absent fetch this param from the OS, type: type to convert to, help: description of argument
parser.add_argument('--epochs', dest = 'epochs', default = 10, type = int, help = 'Number of Epochs')
parser.add_argument('--batch_size', dest = 'batch_size', default = 32, type = int, help = 'Batch Size')
parser.add_argument('--var_target', dest = 'var_target', type=str)
parser.add_argument('--var_omit', dest = 'var_omit', type=str, nargs='*')
parser.add_argument('--project_id', dest = 'project_id', type=str)
parser.add_argument('--bq_project', dest = 'bq_project', type=str)
parser.add_argument('--bq_dataset', dest = 'bq_dataset', type=str)
parser.add_argument('--bq_table', dest = 'bq_table', type=str)
parser.add_argument('--region', dest = 'region', type=str)
parser.add_argument('--experiment', dest = 'experiment', type=str)
parser.add_argument('--series', dest = 'series', type=str)
parser.add_argument('--experiment_name', dest = 'experiment_name', type=str)
parser.add_argument('--run_name', dest = 'run_name', type=str)
args = parser.parse_args()

# clients
bq = bigquery.Client(project = args.project_id)
aiplatform.init(project = args.project_id, location = args.region)

# Vertex AI Experiment
expRun = aiplatform.ExperimentRun.create(run_name = args.run_name, experiment = args.experiment_name)
expRun.log_params({'experiment': args.experiment, 'series': args.series, 'project_id': args.project_id})

# get schema from bigquery source
query = f"SELECT * FROM {args.bq_project}.{args.bq_dataset}.INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{args.bq_table}'"
schema = bq.query(query).to_dataframe()

# get number of classes from bigquery source
nclasses = bq.query(query = f'SELECT DISTINCT {args.var_target} FROM {args.bq_project}.{args.bq_dataset}.{args.bq_table} WHERE {args.var_target} is not null').to_dataframe()
nclasses = nclasses.shape[0]
expRun.log_params({'data_source': f'bq://{args.bq_project}.{args.bq_dataset}.{args.bq_table}', 'nclasses': nclasses, 'var_split': 'splits', 'var_target': args.var_target})

# Make a list of columns to omit
OMIT = args.var_omit + ['splits']

# use schema to prepare a list of columns to read from BigQuery
selected_fields = schema[~schema.column_name.isin(OMIT)].column_name.tolist()

# all the columns in this data source are either float64 or int64
output_types = [dtypes.float64 if x=='FLOAT64' else dtypes.int64 for x in schema[~schema.column_name.isin(OMIT)].data_type.tolist()]

# remap input data to Tensorflow inputs of features and target
def transTable(row_dict):
    target = row_dict.pop(args.var_target)
    target = tf.one_hot(tf.cast(target, tf.int64), nclasses)
    target = tf.cast(target, tf.float32)
    features = [tf.cast(v, tf.float32) for v in row_dict.values()]
    features = tf.stack(features)
    return(
        features, 
        {
            'logistic': target, 
            'classification': target, 
            'decoder': features}
    )

# function to setup a bigquery reader with Tensorflow I/O
def bq_reader(split):
    reader = BigQueryClient()

    training = reader.read_session(
        parent = f"projects/{args.project_id}",
        project_id = args.bq_project,
        table_id = args.bq_table,
        dataset_id = args.bq_dataset,
        selected_fields = selected_fields,
        output_types = output_types,
        row_restriction = f"splits='{split}'",
        requested_streams = 3
    )
    
    return training

# setup feed for train, validate and test
train = bq_reader('TRAIN').parallel_read_rows().prefetch(1).map(transTable).shuffle(args.batch_size*10).batch(args.batch_size)
validate = bq_reader('VALIDATE').parallel_read_rows().prefetch(1).map(transTable).batch(args.batch_size)
test = bq_reader('TEST').parallel_read_rows().prefetch(1).map(transTable).batch(args.batch_size)
expRun.log_params({'training.batch_size': args.batch_size, 'training.shuffle': 10*args.batch_size, 'training.prefetch': 1})

# Three targets: logistics, autoencoder, classification from encoder

# inputs
features = tf.keras.layers.Input(shape = (len(selected_fields)-1,), name = 'features')

# normalize here
normalized = tf.keras.layers.BatchNormalization(name = 'batch_normalization_layer')(features)

# logistic
logistic = tf.keras.layers.Dense(nclasses, activation = tf.nn.softmax, name = 'logistic')(normalized)#(normalized)(features)

# encoder
encode = tf.keras.layers.Dense(25, activation = tf.nn.relu)(normalized)#(normalized)(features)
encode = tf.keras.layers.Dense(20, activation = tf.nn.relu)(encode)
encode = tf.keras.layers.Dense(15, activation = tf.nn.relu, name = 'encoder')(encode)

# classifier
classifier = tf.keras.layers.Dense(nclasses, activation = tf.nn.softmax, name = 'classification')(encode)

# decoder
decode = tf.keras.layers.Dense(20, activation = tf.nn.relu)(encode)
decode = tf.keras.layers.Dense(25, activation = tf.nn.relu)(decode)
decode = tf.keras.layers.Dense(features.shape[1], activation = tf.nn.sigmoid, name = 'decoder')(decode)

# the model
model = tf.keras.Model(
    inputs = features,
    outputs = [logistic, classifier, decode],
    name = args.experiment
)

# compile
model.compile(
    optimizer = tf.keras.optimizers.Adam(), #SGD or Adam
    loss = {
        'logistic': tf.keras.losses.CategoricalCrossentropy(),
        'classification': tf.keras.losses.CategoricalCrossentropy(),
        'decoder': tf.keras.losses.BinaryCrossentropy()
    },
    metrics = {
        'logistic': ['accuracy', tf.keras.metrics.AUC(curve = 'PR', name = 'auprc')],
        'classification': ['accuracy', tf.keras.metrics.AUC(curve = 'PR', name = 'auprc')],
        'decoder': tf.keras.metrics.RootMeanSquaredError(name = 'rmse')
    }
)

# setup tensorboard logs and train
# setup tensorboard logs and train
tensorboard_callback = tf.keras.callbacks.TensorBoard(
    log_dir=os.environ['AIP_TENSORBOARD_LOG_DIR'],
    histogram_freq=1
)
history = model.fit(
    train, 
    epochs = args.epochs, 
    callbacks = [tensorboard_callback], 
    validation_data = validate
)
expRun.log_params({'training.epochs': history.params['epochs']})
for e in range(0, history.params['epochs']):
    expRun.log_time_series_metrics(
        {
            'train_loss': history.history['loss'][e],
            'train_logistic_loss': history.history['logistic_loss'][e],
            'train_classification_loss': history.history['classification_loss'][e],
            'train_decoder_loss': history.history['decoder_loss'][e],
            'train_logistic_accuracy': history.history['logistic_accuracy'][e],
            'train_classification_accuracy': history.history['classification_accuracy'][e],
            'train_logistic_auprc': history.history['logistic_auprc'][e],
            'train_classification_auprc': history.history['classification_auprc'][e],
            'train_decoder_rmse': history.history['decoder_rmse'][e],
            'val_loss': history.history['val_loss'][e],
            'val_logistic_loss': history.history['val_logistic_loss'][e],
            'val_classification_loss': history.history['val_classification_loss'][e],
            'val_decoder_loss': history.history['val_decoder_loss'][e],
            'val_logistic_accuracy': history.history['val_logistic_accuracy'][e],
            'val_classification_accuracy': history.history['val_classification_accuracy'][e],
            'val_logistic_auprc': history.history['val_logistic_auprc'][e],
            'val_classification_auprc': history.history['val_classification_auprc'][e],
            'val_decoder_rmse': history.history['val_decoder_rmse'][e],
        }
    )

# test evaluations:
metrics = model.evaluate(test)
expRun.log_metrics(
    {
        'test_loss': metrics[0],
        'test_logistic_loss': metrics[1],
        'test_classification_loss': metrics[2],
        'test_decoder_loss': metrics[3],
        'test_logistic_accuracy': metrics[4],
        'test_logistic_auprc': metrics[5],
        'test_classification_accuracy': metrics[6],
        'test_classification_auprc': metrics[7],
        'test_decoder_rmse': metrics[8]
    }
)

# extract encode layer
encode_model = tf.keras.Model(
    inputs = model.input,
    outputs = model.get_layer('encoder').output,
    name = args.experiment+'_encoder'
)

# output the model save files
encode_model.save(os.getenv("AIP_MODEL_DIR")+'encoder/')
model.save(os.getenv("AIP_MODEL_DIR"))
expRun.log_params({'model.save': os.getenv("AIP_MODEL_DIR")})
expRun.end_run()