In [None]:
# Import python modules
import tensorflow 
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
import keras_tuner 
from google.cloud import aiplatform

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import numpy
import pandas
import json, os

In [None]:
# Declare variables
REGION = "us-central1"
PROJECT_ID = !(gcloud config get-value core/project)
PROJECT_ID = PROJECT_ID[0]
MODEL_PATH='gs://'+PROJECT_ID+'-bucket/model/'
DATASET_PATH='gs://'+PROJECT_ID+'/area_cover_dataset.csv'
PIPELINE_ROOT = 'gs://'+PROJECT_ID
MODEL_ARTIFACTS_LOCATION ='gs://'+PROJECT_ID+'-bucket/'

In [None]:
# Read the area_cover_dataset csv data into pandas dataframe
area_cover_dataframe = pandas.read_csv(DATASET_PATH)

**Task 4** Create the function that converts categorical data to indexed integer values

In [None]:
# Function that takes the area cover dataframe and converts the two categorical (string) columns into indexed values
def index(dataframe):
    
    [ TODO - Insert your Code ]
    
    return dataframe

**Task 5** Extract the feature columns and standardize the values

In [None]:
# Extract the feature columns into a new dataframe called scaler_features that has been standardized using the sklearn.preprocessing.StandardScaler method.
# The features are all columns from the area cover dataset except the "Area_Cover" column
indexed_dataframe = index(area_cover_dataframe)
features_dataframe = indexed_dataframe.drop("Area_Cover", axis = 1)
standard_scaler = StandardScaler()

scaled_features = [ TODO - Insert your code ]

In [None]:
# Create a binary matrix containing the categorical Area_Cover column data converted using keras.utils.to_categorical()
labels_dataframe = indexed_dataframe["Area_Cover"]
categorical_labels = to_categorical(labels_dataframe)

In [None]:
# Split the dataset into model training and validation data
dfx_train, dfx_val, dfy_train, dfy_val = train_test_split(scaled_features.values, categorical_labels, test_size=0.2)

**Task 6** Create a function that returns a sequential categorical model function with a hyperparameter tuning layer

In [None]:
# Create a function that returns a sequential categorical model function with a hyperparameter tuning layer
def build_model(hptune):
    model = Sequential()
    model.add(Dense(128, input_shape = (12,), activation = "relu"))
    
    [ TODO - Insert your code ]
    
    return model

**Task 7** Create a Keras Hyperband Hyperparameter tuner with an accuracy objective

In [None]:
# Create a Keras Hyperband Hyperparameter tuner with an accuracy objective

tuner =  [ TODO - Insert your code ]

**Task 8** Perform Hyperparameter tuning and train the optimal model

You do not have to add any of your own code for this task. Run the cells to tune, optimize and train the model. 

In [None]:
# Define an early stopping callback using that stops when the validation loss quantity does not improve after 5 epochs
stop_early = tensorflow.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

# Perform a Keras Tuner Search for the best hyperparameter configurations using the training data split over 50 epochs
tuner.search(dfx_train, dfy_train, epochs=50, validation_split=0.2, callbacks=[stop_early])

# Get the optimal hyperparameters for the model as determined from the search
best_hyperparameters=tuner.get_best_hyperparameters(num_trials=10)[0]

In [None]:
# Create a new model using the best_hyperparameters and train it. 
model = tuner.hypermodel.build(best_hyperparameters)
history = model.fit(dfx_train, dfy_train, epochs=50, validation_split=0.2)

In [None]:
# Using the model training history find and print out the epoch with the best validation accuracy. 
val_acc_per_epoch = history.history['val_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

In [None]:
# Print out the Model test loss and test accuracy by evaluating the validation data split. 
eval_result = model.evaluate(dfx_val, dfy_val)
print("[Model test loss, test accuracy]:", eval_result)

In [None]:
# Create a new model (hypermodel) using the best_hyperparameters and retrain. 
hypermodel = tuner.hypermodel.build(best_hyperparameters)
# Retrain the model using the number of epochs that was previously determined to be the best. 
hypermodel.fit(dfx_train, dfy_train, epochs=best_epoch, validation_split=0.2)

In [None]:
# Print out the test loss and test accuracy for hypermodel by evaluating the validation data split. 
eval_result = hypermodel.evaluate(dfx_val, dfy_val)
print("[Hypermodel test loss, test accuracy]:", eval_result)

In [None]:
# Save the hypertuned model
# NB the MODEL_PATH bucket must be created before this will succeed and it must be in the same location as the model.
# e.g. gsutil mb -l us-central1  gs://${PROJECT_ID}-bucket
hypermodel.save(MODEL_PATH)

**Task 9** Create a Custom Container for Vertex AI pipeline model training
1. Create a Python model trainer module using the above code
2. Save the code as `model.py` in the `model/trainer` beneath the current working directory for this notebook
3. Make sure you set the Project ID correctly in the Python script. 
4. Create the Dockerfile definition in the `model/` directory for your custom training container using the `gcr.io/deeplearning-platform-release/tf2-cpu.2-6` base container image

Once you have prepared the custom container Python module code and Dockerfile you can build and test the custom container. 

In [None]:
# Build the container using the following gcr.io tag
IMAGE_URI="gcr.io/{}/tensorflow:latest".format(PROJECT_ID)
!docker build ~/model/. -t $IMAGE_URI

In [None]:
# Run the docker image locally to test it
!docker run $IMAGE_URI

In [None]:
# Push the docker image to the Google container registry
!docker push $IMAGE_URI

In [None]:
# Install kubeflow pipeline SDK and google cloud pipeline component for building Vertex AI pipelines
!pip3 install kfp google_cloud_pipeline_components

In [None]:
# Import the libraries required for Vertext AI pipelines
import kfp
from kfp.v2 import compiler
from google.cloud import aiplatform
from google_cloud_pipeline_components import aiplatform as gcc_aip

**Task 10** Define the Vertex AI Training pipeline

1. Add your code for the Training Operation using your newly created custom container
    * This should reference the custom container_uri passed in as a parameter
    * This should use "us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-6:latest" for the `model_serving_container_image_uri`
2. Add your code for the Model Deploy Operation
    * This operation should output a model and an endpoint.
    
All machine types should be specified as "n1-standard-4"


In [None]:
# Define the Vertex AI pipeline
@kfp.dsl.pipeline(name="vertex-ai-pipeline",
                  pipeline_root=PIPELINE_ROOT)
def pipeline(
    bucket: str = MODEL_ARTIFACTS_LOCATION,
    project: str = PROJECT_ID,
    gcp_region: str = REGION,
    container_uri: str = "",
):
    
    training_op = gcc_aip.CustomContainerTrainingJobRunOp(
        display_name="tensorflow-train-model",
        
        [ TODO - Insert your code ]
        
    )
       
    create_endpoint_op = gcc_aip.EndpointCreateOp(
        project=project,
        display_name = "tensorflow-model-endpoint",
    )
    
    model_deploy_op = gcc_aip.ModelDeployOp(
        
        [ TODO - Insert your code ]        
        
    )

In [None]:
# Compile the  Vertex AI pipeline
compiler.Compiler().compile(
    pipeline_func=pipeline, package_path="pipeline.json"
)

**Task 11** Create the Vertex AI Pipeline job object

The pipeline job must specified using the compiled pipeline definition JSON file and should point to your saved model location and your custom training container

In [None]:
# Create the Vertex AI Pipeline job object
pipeline_job = aiplatform.PipelineJob(
    
    [ TODO - Insert your code ]   
    
)

In [None]:
# Run the Vertex AI pipeline job
pipeline_job.run()

In [None]:
# List the model created by the pipeline
!gcloud ai models list --region=$REGION

In [None]:
# Store the endpoint ID where the model has been deployed 

!gcloud ai endpoints list --region=$REGION
ENDPOINT_IDS=!gcloud ai endpoints list --region=$REGION --format="value(name)" 2>/dev/null
print("Vertex AI Endpoint ID:" + ENDPOINT_IDS[0])


In [None]:
# Copy in the pre-prepared sample test.json 
!gsutil cp gs://sureskills-lab-dev/CEPF/vertex-ai/test.json . 

**Task 12** Create a function to convert the source JSON test data to an array of normalized column values

The test data consists of samples with feature data that you want to use to generate area_cover type predictions using the model endpoint. 

You must define a functon that performs the following tasks:
1. Read the `test.json` instance data into a dataframe
2. Normalize the column data using the `StandardScalar.fit_transform` method
3. Output an array of arrays containing the normalized feature column data for each test instance.

In [None]:
# Convert the json test data to an array of standard scaler normalized column data
def get_instances(file_name):
    instances = []

    [ TODO - Insert your code ]   

    for _ in normalize_df.values:
        instances.append(list(_))
    return instances

In [None]:
# Define a function for making predictions using the endpoint
def endpoint_predict( project: str, location: str, instances, endpoint: str):
    aiplatform.init(project=project, location=location)
    endpoint = aiplatform.Endpoint(endpoint)   
    prediction = endpoint.predict(instances=instances)
    return prediction

In [None]:
# Test the result by calling get_values() that convert JSON to the numpy array
# Replace the endpoint ID with the new ENDPOINT_ID if needed
FILE_NAME = "test.json"
instances = get_instances(FILE_NAME)
prediction_result = endpoint_predict(
    project=PROJECT_ID,
    location=REGION,
    instances=instances,
    endpoint=ENDPOINT_IDS[0]
)

In [None]:
# Save `Area_Cover` predictions with respect to the test instance features
area_cover_predictions={}
for index,area_cover in enumerate(prediction_result.predictions):
    print(index,":",numpy.argmax(area_cover), end=' \n')
    area_cover_predictions[index]=str(numpy.argmax(area_cover))
    
f = open("predictions.txt", "w")
f.write(json.dumps(area_cover_predictions))
f.close()