In [None]:
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

### Import packages

In [None]:
import os
import json
import logging
import pandas as pd
import numpy as np

from datetime import datetime
from pytz import timezone

Adjusted this notebook from this codelab:
https://codelabs.developers.google.com/vertex_custom_training_prediction

## 1. Overview
In this lab, you will use GKE instead of [Vertex AI](https://cloud.google.com/vertex-ai/docs) to train and serve a TensorFlow model using code in a custom container.

While we're using scikit-learn for the model code here, you could easily replace it with another framework.

What you learn
You'll learn how to:

Build and containerize model training code in Vertex Notebooks
Submit a custom model training job to GKE
Deploy your trained model GKE as a job, and use that job to get predictions

## 2. Intro to Vertex AI
This lab uses GKE to run training and predictions.

Although running training/prediction is an option, consider the newest AI product offering available on Google Cloud. [Vertex AI](https://cloud.google.com/vertex-ai/docs) integrates the ML offerings across Google Cloud into a seamless development experience. Previously, models trained with AutoML and custom models were accessible via separate services. The new offering combines both into a single API, along with other new products. You can also migrate existing projects to Vertex AI. If you have any feedback, please see the [support page](https://cloud.google.com/vertex-ai/docs/support/getting-support).

Vertex AI includes many different products to support end-to-end ML workflows. This lab will focus on the products highlighted below: Training, Prediction, and Notebooks.

## 3. Setup your environment
You'll need a Google Cloud Platform project with billing enabled to run this codelab. To create a project, follow the [instructions here](https://cloud.google.com/resource-manager/docs/creating-managing-projects).

### Step 3.1: Enable the Compute Engine API
Navigate to [Compute Engine](https://console.cloud.google.com/marketplace/details/google/compute.googleapis.com) and select **Enable** if it isn't already enabled. You'll need this to create your notebook instance.
### Step 3.2: Enable the Vertex AI API
Navigate to the [Vertex AI section of your Cloud Console](https://console.cloud.google.com/ai/platform) and click **Enable** Vertex AI API.
### Step 3.3: Enable the Container Registry API
Navigate to the [Container Registry](https://console.cloud.google.com/apis/library/containerregistry.googleapis.com) and select **Enable** if it isn't already. You'll use this to create a container for your custom training job.
### Step 3.4: Create an Vertex Notebooks instance
From the [Vertex AI section](https://console.cloud.google.com/ai/platform) of your Cloud Console, click on Notebooks.

From there, select **New Instance**. Then select the **TensorFlow Enterprise 2.3** instance type **without GPUs**:

## 4. Load the data for training and predictions to GCS

### Configure Global Variables

List your current GCP project name

In [None]:
project_id = !gcloud config list --format 'value(core.project)' 2>/dev/null
print(project_id)

Configure your system variables

In [None]:
# Configure your global variables
PROJECT = project_id[0]          # Replace with your project ID
USER = 'test_user'               # Replace with your user name
BUCKET_NAME = project_id[0] + '-vertex-ai'       # Replace with your gcs bucket name

FOLDER_NAME = 'sklearn_models'
ALGORITHM = 'isolation_forest'
TIMEZONE = 'US/Pacific'         
REGION = 'us-central1'           # bucket should be in same region as Vertex AI         
TRAIN_FEATURE_PATH = f"gs://{BUCKET_NAME}/{FOLDER_NAME}_data/{ALGORITHM}/train/train.csv"
TEST_FEATURE_PATH = f"gs://{BUCKET_NAME}/{FOLDER_NAME}_data/{ALGORITHM}/test/test.csv"

In [None]:
print(f"Project:      {PROJECT}")
print(f"Bucket Name: {BUCKET_NAME}")
print(f"Training Data URI:  {TRAIN_FEATURE_PATH}")
print(f"Test Data URI:      {TEST_FEATURE_PATH}")

**Create your bucket**

In [None]:
!gsutil mb -l $REGION gs://$BUCKET_NAME 

### Create and upload the dataset

In [None]:
DATA_DIR = "data"
gcs_source_train_url = TRAIN_FEATURE_PATH
gcs_source_test_url = TEST_FEATURE_PATH
local_source_train = DATA_DIR + "/train/train.csv"
local_source_test = DATA_DIR + "/test/test.csv"

print(f"Train data content will be loaded to {gcs_source_train_url}")
print(f"Local train data content is here {local_source_train}")
print(f"Test data content will be loaded to {gcs_source_train_url}")
print(f"Local test data content is here {local_source_test}")

**Create the dataset**

In [None]:
rng = np.random.RandomState(42)

# Generate train data
x = 0.3 * rng.randn(100, 2)
x_train = np.r_[x + 2, x - 2]
# Generate some regular novel observations
x = 0.3 * rng.randn(20, 2)
x_test = np.r_[x + 2, x - 2]
# Generate some abnormal novel observations
x_outliers = rng.uniform(low=-4, high=4, size=(20, 2))

if not os.path.exists(DATA_DIR + '/train'):
    os.makedirs(DATA_DIR + '/train')
if not os.path.exists(DATA_DIR + '/test'):
    os.makedirs(DATA_DIR + '/test')
np.savetxt(local_source_train, x_train, fmt='%s', delimiter=",")
np.savetxt(local_source_test, x_test, fmt='%s', delimiter=",")

**Copy the dataset to GCS**

In [None]:
!gsutil cp $local_source_train $gcs_source_train_url
!gsutil cp $local_source_test $gcs_source_test_url

In [None]:
path = f"gs://{BUCKET_NAME}/{FOLDER_NAME}_data/{ALGORITHM}/train"
!gsutil ls $path
path = f"gs://{BUCKET_NAME}/{FOLDER_NAME}_data/{ALGORITHM}/test"
!gsutil ls $path

### Next, open the **1. sklearn-cb-ctr-setup-training** notebook