In [None]:
pip install -r requirements.txt --quiet

In [None]:
# import required libraries

import numpy as np
import pandas as pd
import tensorflow as tf
import os
import joblib
import dill

from google.cloud import aiplatform
from google.cloud import storage

from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder

In [None]:
# set required constants (replace project with your project id)

PROJECT = "[project]"
LOCATION = "us-central1"

MODEL_NAME = "adult-income-cpr-model"

MODEL_LOCAL_PATH="./adult-income-cpr-model"
SRC_LOCAL_PATH="./source"

In [None]:
# build local directories

os.makedirs(MODEL_LOCAL_PATH, exist_ok=True)
os.makedirs(SRC_LOCAL_PATH, exist_ok=True)

In [None]:
# prepare the data

# Load the data
data = pd.read_csv('adult-income.csv')

# Exclude 'functional_weight' and 'income_bracket' from features
features = ['age', 'workclass', 'education', 'education_num', 'marital_status', 'occupation', 
            'relationship', 'race', 'sex', 'capital_gain', 'capital_loss', 'hours_per_week', 'native_country']
X = data[features].values
y = data['income_bracket'].values

# Encode the string labels to integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Identify categorical features
categorical_features = ['workclass', 'education', 'marital_status', 'occupation', 
                        'relationship', 'race', 'sex', 'native_country']

# OneHotEncode the categorical features
categorical_encoder = OneHotEncoder(sparse_output=False)
categorical_encoded = categorical_encoder.fit_transform(data[categorical_features])

# Combine the numerical features with the encoded categorical features
numerical_features = ['age', 'education_num', 'capital_gain', 'capital_loss', 'hours_per_week']
X_combined = np.hstack((data[numerical_features].values, categorical_encoded))

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_combined, y_encoded, test_size=0.2, random_state=42)

# Scale the numerical features in the training data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train[:, :len(numerical_features)])
X_test_scaled = scaler.transform(X_test[:, :len(numerical_features)])

# Combine the scaled numerical features with the encoded categorical features
X_train_final = np.hstack((X_train_scaled, X_train[:, len(numerical_features):]))
X_test_final = np.hstack((X_test_scaled, X_test[:, len(numerical_features):]))

# Save the scaler and encoders for later use during prediction
joblib.dump(scaler, f'{MODEL_LOCAL_PATH}/scaler.pkl')
joblib.dump(label_encoder, f'{MODEL_LOCAL_PATH}/label_encoder.pkl')
joblib.dump(categorical_encoder, f'{MODEL_LOCAL_PATH}/categorical_encoder.pkl')


In [None]:
# build, train and save the model

# define the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_final.shape[1],)),
    Dense(64, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

# compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# train the model
model.fit(X_train_final, y_train, epochs=10, validation_split=0.2)

# save the model
model.save(f"{MODEL_LOCAL_PATH}/{MODEL_NAME}.keras")

In [None]:
%%writefile $SRC_LOCAL_PATH/requirements.txt
fastapi
uvicorn
pandas
tensorflow
google-cloud-storage
google-cloud-aiplatform[prediction]
scikit-learn
dill

In [None]:
%%writefile $SRC_LOCAL_PATH/predictor.py

import pandas as pd
import numpy as np
import pickle
import joblib
import os
import shutil
import tensorflow as tf
from typing import Dict

from google.cloud.aiplatform.prediction.predictor import Predictor
from google.cloud.aiplatform.utils import prediction_utils

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder

MODEL_NAME = "adult-income-cpr-model"

class CustomPredictor(Predictor):
    
    def __init__(self):
        return
    
    # load the model and the preprocessing objects
    def load(self, artifacts_uri: str):
        prediction_utils.download_model_artifacts(artifacts_uri)
        self._model = tf.keras.models.load_model(f"{MODEL_NAME}.keras")
        
        with open(f"scaler.pkl", "rb") as f:
            scaler = joblib.load(f)
        self._scaler = scaler
        
        with open(f"label_encoder.pkl", "rb") as f:
            label_encoder = joblib.load(f)
        self._label_encoder = label_encoder

        with open(f"categorical_encoder.pkl", "rb") as f:
            categorical_encoder = joblib.load(f)
        self._categorical_encoder = categorical_encoder
            
    # preprocess the raw input data
    def preprocess(self, prediction_input):
        instances = prediction_input["instances"]
        instances_numeric_features = np.array([[instance[0], instance[3], instance[9], instance[10], instance[11]] for instance in instances])
        instances_scaled_numeric_features = self._scaler.transform(instances_numeric_features)        
        instances_categorical_features = np.array([instance[1:3] + instance[4:9] + [instance[12]] for instance in instances])
        instances_categorical_encoded = self._categorical_encoder.transform(instances_categorical_features)
        instances_combined = np.hstack((instances_scaled_numeric_features, instances_categorical_encoded))
        return instances_combined

    # make the prediction
    def predict(self, instances):
         return self._model.predict(instances)

    # select the higher probability
    # convert to text label
    # compose array of results
    def postprocess(self, prediction_results):
        predictions = []
        for prediction in prediction_results:
            predicted_label = np.argmax(prediction)
            decoded_label = self._label_encoder.inverse_transform([predicted_label])[0]
            output_entry = {
                "predicted_label": decoded_label,
                "predicted_probabilities": prediction.tolist()
            }
            predictions.append(output_entry)
        return {"predictions": predictions}

In [None]:
# build a local model with custom predictor

import importlib
from google.cloud.aiplatform.prediction import LocalModel
from source.predictor import CustomPredictor

REPOSITORY = "adult-income-cpr-repo"  # @param {type:"string"}
IMAGE = "adult-income-cpr-server"  # @param {type:"string"}

local_model = LocalModel.build_cpr_model(
    SRC_LOCAL_PATH,
    f"{LOCATION}-docker.pkg.dev/{PROJECT}/{REPOSITORY}/{IMAGE}",
    predictor=CustomPredictor,
    requirements_path=os.path.join(SRC_LOCAL_PATH, "requirements.txt"),
)
     

In [None]:
# do the prediction

import json

request = """
    {"instances":[
        [39,"Private", "9th",5,"Married-civ-spouse","Other-service","Wife","Black","Female",3411,0,34,"United-States"],
        [77,"Private", "9th",5,"Married-civ-spouse","Priv-house-serv","Wife","Black","Female",0,0,10,"United-States"],
        [27,"Local-gov","HS-grad",9,"Married-civ-spouse","Exec-managerial","Husband","White","Male",0,0,80,"United-States"],
        [40,"Private","Masters",14,"Married-civ-spouse","Exec-managerial","Husband","White","Male",0,0,46,"United-States"]
    ]}
"""

with local_model.deploy_to_local_endpoint(
    artifact_uri=MODEL_LOCAL_PATH
) as local_endpoint:
    health_check_response = local_endpoint.run_health_check()
    print(health_check_response, health_check_response.content)

    predict_response = local_endpoint.predict(
        request=request,
        headers={"content-type": "application/json"},
    )
    print(predict_response, predict_response.content)

    local_endpoint.print_container_logs()

In [None]:
# push the prediction container to Artifact Registry
! gcloud auth configure-docker {LOCATION}-docker.pkg.dev --quiet
! gcloud artifacts repositories create $REPOSITORY --repository-format=docker --location=$LOCATION --description="Docker repository"
local_model.push_image()

In [None]:
# upload the trained model to Vertex AI

! gcloud storage cp {MODEL_NAME}/* gs://{PROJECT}/{MODEL_NAME}

from google.cloud import aiplatform

model = aiplatform.Model.upload(
    local_model=local_model,
    display_name=MODEL_NAME,
    artifact_uri=f"gs://{PROJECT}/adult-income-cpr-model",
)

In [None]:
# deploy the model to a new endpoint

endpoint = model.deploy(machine_type="n2-highmem-2")

In [None]:
# do predictions with the endpoint

# populate the list of instances for prediction
instances = [
    [39,"Private", "9th",5,"Married-civ-spouse","Other-service","Wife","Black","Female",3411,0,34,"United-States"],
    [77,"Private", "9th",5,"Married-civ-spouse","Priv-house-serv","Wife","Black","Female",0,0,10,"United-States"],
    [27,"Local-gov","HS-grad",9,"Married-civ-spouse","Exec-managerial","Husband","White","Male",0,0,80,"United-States"],
    [40,"Private","Masters",14,"Married-civ-spouse","Exec-managerial","Husband","White","Male",0,0,46,"United-States"]

]

payload = {
    "instances":instances
}

response = endpoint.predict(instances=instances)
print(response)