### Imports

In [None]:
import pandas as pd

# import GCP Demo 1 package
from gcpdemo1 import gcp, etl, data, tune, train, predict
from gcpdemo1.tune import MLPTuner

### ***USER INPUT***: Enter GCP Variables

In [None]:
# GCP variables
gcp_vars = {
    'credentials_path': '../credentials/ml-sandbox-1-191918-384dcea092ff.json', # local path to gcp credentials
    'project_name': 'ml-sandbox-1-191918', # gcp project name
    'bucket': 'gcp-cert-demo-1', # gcp bucket name
    'gcs_trainer_path': 'taxi_mlp_trainer/trainer-0.1.tar.gz', # path to write trainer package to within bucket
    'local_trainer_path': '../mlp_trainer', # local path to trainer package
    'table_id': 'finaltaxi_encoded_sampled_small' # bigquery table to use as dataset TODO: temporary small table
}

### Authentication

In [None]:
# get GCP credentials
gcp_credentials = gcp.get_credentials(gcp_vars['credentials_path'])

### Create training package

In [None]:
# build and upload trainer package
# TODO: does not work on windows because of no bash command
gcp_vars['package_uri'] = gcp.build_and_upload_trainer_package(
    bucket_name=gcp_vars['bucket'],
    destination_blob_name=gcp_vars['gcs_trainer_path'],
    local_trainer_package_path=gcp_vars['local_trainer_path'],
    credentials=gcp_credentials
)

### Tune hyperparameters

In [None]:
job_id_prefix = 'gcpdemo1_mlp_tuning'
job_dir_prefix = 'gs://gcp-cert-demo-1/hp_tuning'
output_path = 'gs://gcp-cert-demo-1/hp_tuning/hp_tuning_results.csv'
machine_type = 'complex_model_m_gpu' # https://cloud.google.com/ml-engine/docs/machine-types

# Optimizer parameters:
#      "Adam"    for tf.keras.optimizers.Adam
#      "Nadam"   for tf.keras.optimizers.Nadam
#      "RMSprop" for tf.keras.optimizers.RMSprop
#      "SGD"     for tf.keras.optimizers.SGD

# params = {
#     # Tunable params
#     "dense_neurons_1": [64, 128, 9],
#     "dense_neurons_2": [32, 64, 5],
#     "dense_neurons_3": [8, 32, 7],
#     "activation": ["relu", "elu"],
#     "dropout_rate_1": [0, 0.5, 5],
#     "dropout_rate_2": [0, 0.5, 5],
#     "dropout_rate_3": [0, 0.5, 5],
#     "optimizer": ["Adam", "Nadam", "RMSprop", "SGD"],
#     "learning_rate": [.0001, .0005, .001, .005, .01, .05, .1, .5, 1],
#     "kernel_initial_1": ["normal", "glorot_normal", "he_normal", "lecun_normal"],
#     "kernel_initial_2": ["normal", "glorot_normal", "he_normal", "lecun_normal"],
#     "kernel_initial_3": ["normal", "glorot_normal", "he_normal", "lecun_normal"],

#     # Static params
#     "batch_size": [128],
#     "chunk_size": [500000],
#     "epochs": [40],
#     "validation_freq": [1],
#     "patience": [20]
# }

params = {
    "dense_neurons_1": [64, 9],
    "dense_neurons_2": [32],
    "dense_neurons_3": [8],
    "activation": ["relu"],
    "dropout_rate_1": [0.5],
    "dropout_rate_2": [0.5],
    "dropout_rate_3": [0.5],
    "optimizer": ["Adam"],
    "learning_rate": [.0001],
    "kernel_initial_1": ["normal"],
    "kernel_initial_2": ["normal"],
    "kernel_initial_3": ["normal"],

    "batch_size": [1024],
    "chunk_size": [500000],
    "epochs": [1],
    "validation_freq": [1],
    "patience": [5]
}



In [None]:
# create mlp tuner
mlp_tuner = tune.MLPTuner(project_name=gcp_vars['project_name'],
                   credentials=gcp_credentials,
                   job_id_prefix=job_id_prefix,
                   master_type=machine_type,
                   job_dir_prefix=job_dir_prefix,
                   table_id=gcp_vars['table_id'])

# begin tuning job
tuning_log_path = mlp_tuner.tune(gcp_vars['package_uri'], params, output_path)

In [None]:
# check status of tuning job
print(f'Tuning job status: {gcp.check_mle_job_status(mlp_tuner)}')

In [None]:
# review tuning output (this is now done in GCP - AI Platform)
local_results_path = './tuning_results.csv'
gcp.download_blob(gcp_vars['bucket'], tuning_log_path, local_results_path, credentials)

tuning_output = pd.read_csv(local_path)
tuning_output

### ***USER INPUT***: Enter training and job parameters

In [None]:
# input job parameters
job_id_prefix = 'taxi_mlp_trainer', # training job ID
job_dir_prefix = 'gs://gcp-cert-demo-1/taxi_mlp_trainer'


# input training parameters
training_params = {
    'dense_neurons_1': 64,
    'dense_neurons_2': 32,
    'dense_neurons_3': 8,
    'activation': 'relu',
    'dropout_rate_1': 0.1,
    'dropout_rate_2': 0.1,
    'dropout_rate_3': 0.1,
    'optimizer': 'adam',
    'learning_rate': 0.1,
    'chunk_size': 500000,
    'batch_size': 1024,
    'epochs': 1,
    'validation_freq': 5,
    'kernel_initial_1': 'normal',
    'kernel_initial_2': 'normal',
    'kernel_initial_3': 'normal',
}

### Train model

In [None]:
# create mlp trainer
mlp_trainer = train.MLPTrainer(
    credentials=gcp_credentials,
    project_name=gcp_vars['project_name'],
    bucket=gcp_vars['bucket'],
    job_id_prefix=job_id_prefix,
    job_dir_prefix=job_dir_prefix,
    table_id=gcp_vars['table_id'],
    trainer_package_uri=gcp_vars['gcs_trainer_path']
)

# begin training job
mlp_trainer.train(training_params)

### Check training status

In [None]:
# check status of training job
print(f'Training job status: {gcp.check_mle_job_status(mlp_trainer)}')

### Deploy model

In [None]:
# deploy trained model
model_name = 'taxi_cab_cash_or_credit'
mlp_trainer.deploy(model_name, version_name='v1')

### Make predictions

In [None]:
# TODO: basic predictions, need to flesh out
# get test data
rows = data.get_reader_rows(
    credentials=gcp_credentials,
    table_id=gcp_vars['table_id'],
    partition_name='test'
)
vals = pd.DataFrame(list(rows)[:10]).drop(
    'cash',
    axis=1
).values
instances = []
for instance in vals:
    instances.append({'dense_input': list(instance)})

# Todo - remove dependency on mlp_trainer module for model_dir
# predict using deployed model
predictor = predict.Predictor(
    credentials=gcp_credentials,
    project=gcp_vars['project_name'],
    model=model_name
)
predictions = predictor.predict(instances)