In [None]:
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

### Set gcloud project

In [None]:
#!gcloud config set project YOUR_PROJECT_ID

In [None]:
project_id = !gcloud config list --format 'value(core.project)' 2>/dev/null
print(project_id)

In [None]:
#!gcloud config list

### Environmental variables

In [None]:
import os
os.environ['CLUSTER'] = "training-cluster"
os.environ['PROJECT_ID'] = project_id[0]

# Pre-Requisites to Setup GKE Cluster

### 1. gcloud command - Create cluster

In [None]:
!gcloud container clusters create training-cluster-beta \
--num-nodes=2 \
--zone=us-central1-b \
--workload-pool=${PROJECT_ID}.svc.id.goog

### 2. Create Kubernetes service account

In [None]:
!kubectl create serviceaccount sa-trainer

### 3. Create Google service account

In [None]:
gcloud iam service-accounts create gke-trainer-sa

### 4. Create IAM policy binding that allows Kubernetes SA to impersonate the Google service account. This binding allows the Kubernetes Service account to act as the Google service account.

In [None]:
!gcloud iam service-accounts add-iam-policy-binding \
--role roles/iam.workloadIdentityUser \
--member "serviceAccount:virtual-anomaly.svc.id.goog[default/sa-trainer]" \
gke-trainer-sa@virtual-anomaly.iam.gserviceaccount.com

### 5. Add storage role to the Google service account

In [None]:
!gcloud projects add-iam-policy-binding virtual-anomaly \
--member "serviceAccount:gke-trainer-sa@virtual-anomaly.iam.gserviceaccount.com" \
--role "role/storage.objectAdmin"

### 6. Connect to the cluster (Command can also be found in Cloud Console for GKE)

Configure kubectl  command line access by running the following command:

In [None]:
!gcloud container clusters get-credentials training-cluster-beta --zone us-central1-b --project $PROJECT_ID

### 7. Add annotation to the Kubernetes service account, using the email address of the Google service account.

In [None]:
!kubectl annotate serviceaccount \
--namespace default sa-trainer \
iam.gke.io/gcp-service-account=gke-trainer-sa@virtual-anomaly.iam.gserviceaccount.com

Refer to https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity for the latest instructions.

# Run Training and Prediction Jobs on GKE Cluster

### Run the Kubernetes training job

In [None]:
!kubectl apply -f k8s_job_training.yaml

#Verify

Verify that the output cloud storage location specified in the yaml has the contents with the timestamp matching the job run time.

### Run the Kubernetes prediction job

In [None]:
!kubectl apply -f k8s_job_prediction.yaml

#Verify

Verify that the output cloud storage location specified in the yaml has the contents with the timestamp matching the job run time.

### Troubleshooting 101

kubectl get jobs # Lists all jobs in the default namespace

kubectl get pods # Lists all pods in the default namespace

kubectl describe job {pod_name} # Gives additional information about the job

kubectl describe pod {pod_name} # Gives additional information about the pod

kubectl logs {pod_name} # Shows the pod logs

In [None]:
!kubectl get jobs

### Rerun Training

In [None]:
!kubectl delete job trainer-job

### Rerun Prediction/Infererence

In [None]:
!kubectl delete job prediction-job

# Destroy the cluster

In [None]:
!gcloud container clusters delete training-cluster