In [None]:
pip install -r requirements.txt --quiet

In [None]:
# perform some setup

from google.cloud import aiplatform
from google.cloud import storage
import joblib
import dill
import numpy as np
import pandas as pd

PROJECT = "[project]"
LOCATION = "us-central1"

MODEL_NAME = "adult-income-cc-training-model"
BUCKET_NAME = "[project]"
GCS_FOLDER = "adult-income-cc-training-model"

REPO_NAME = 'adult-income-cc-training-repo'
IMAGE_NAME = 'adult-income-cc-training-image'
JOB_NAME = 'adult-income-cc-training-job'
SERVING_CONTAINER_URI = 'us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-13:latest'

! gcloud artifacts repositories create $REPO_NAME --repository-format=docker \
--location=$LOCATION --description="Docker repository"

In [None]:
# configure docker auth

! gcloud auth configure-docker {LOCATION}-docker.pkg.dev --quiet

In [None]:
# build and push the training image

import sys

IMAGE_URI = f"{LOCATION}-docker.pkg.dev/{PROJECT}/{REPO_NAME}/{IMAGE_NAME}"

! docker build . -t $IMAGE_URI
! docker push $IMAGE_URI


In [None]:
# create the CustomContainerTrainingJob object

# initialize aiplatform
aiplatform.init([arguments])

# create tge CustomContainerTrainingJob object
job = [finish the code]

print(job)

In [None]:
# run the job

model = [finish the code]

In [None]:
# deploy the model to the endpoint
endpoint = [finish the code]]

print(f'Endpoint ID: {endpoint.resource_name}')

In [None]:
# copy the pickled preprocessing stuff

storage_client = storage.Client()
bucket = storage_client.bucket(BUCKET_NAME)
pkls = [
    "scaler.pkl",
    "label_encoder.pkl",
    "categorical_encoder.pkl"
]

for pkl in pkls:
    blob = bucket.blob(f"{GCS_FOLDER}/{pkl}")
    blob.download_to_filename(pkl)

# load the pre-trained ML preprocessing objects (scaler, label_encoder, and categorical_encoder)
scaler = joblib.load('scaler.pkl')
label_encoder = joblib.load('label_encoder.pkl')
categorical_encoder = joblib.load('categorical_encoder.pkl')


In [None]:
# do the prediction

# populate the list of instances for prediction
instances = [
    [39,"Private", "9th",5,"Married-civ-spouse","Other-service","Wife","Black","Female",3411,0,34,"United-States"],
    [77,"Private", "9th",5,"Married-civ-spouse","Priv-house-serv","Wife","Black","Female",0,0,10,"United-States"],
    [27,"Local-gov","HS-grad",9,"Married-civ-spouse","Exec-managerial","Husband","White","Male",0,0,80,"United-States"],
    [40,"Private","Masters",14,"Married-civ-spouse","Exec-managerial","Husband","White","Male",0,0,46,"United-States"]

]

# what is this code doing? why does it work?
instances_numeric_features = np.array([[instance[0], instance[3], instance[9], instance[10], instance[11]] for instance in instances])
instances_scaled_numeric_features = scaler.transform(instances_numeric_features)

# what is this code doing? why does it work?
instances_categorical_features = np.array([instance[1:3] + instance[4:9] + [instance[12]] for instance in instances])
instances_categorical_encoded = categorical_encoder.transform(instances_categorical_features)

# what is this code doing?
instances_combined = np.hstack((instances_scaled_numeric_features, instances_categorical_encoded))

# what is this code doing? why is it important?
preprocessed_instances_as_list = instances_combined.tolist()

# call the predict method and pass the instances for prediction
response = endpoint.predict(instances=preprocessed_instances_as_list)

# what is response.predictions?
# what is a prediction?
# what is argmax doing?
# what is inverse_transform doing?
for prediction in response.predictions:
    predicted_label = np.argmax(prediction)
    decoded_label = label_encoder.inverse_transform([predicted_label])[0]
    print(f'Predicted Label: {decoded_label}')
    print(f'Predicted Probabilities: {prediction}')