In [8]:
pip install -r requirements.txt --quiet

Note: you may need to restart the kernel to use updated packages.


In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
import joblib
import dill

from google.cloud import aiplatform
from google.cloud import storage

from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder

2024-09-17 19:01:27.878260: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-17 19:01:27.880287: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-17 19:01:27.889403: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-17 19:01:27.908994: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-17 19:01:27.937191: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been 

In [2]:
PROJECT = "jwd-test-sbcl"
LOCATION = "us-central1"

MODEL_NAME = "adult-income-cpr-model"
BUCKET_NAME = "jwd-test-sbcl"
GCS_FOLDER = "adult-income-cpr-model"

MODEL_LOCAL_PATH="./adult-income-cpr-model"
SRC_LOCAL_PATH="./source"

In [3]:
os.makedirs(MODEL_LOCAL_PATH, exist_ok=True)
os.makedirs(SRC_LOCAL_PATH, exist_ok=True)

In [4]:
# Load the data
data = pd.read_csv('adult-income.csv')

# Exclude 'functional_weight' and 'income_bracket' from features
features = ['age', 'workclass', 'education', 'education_num', 'marital_status', 'occupation', 
            'relationship', 'race', 'sex', 'capital_gain', 'capital_loss', 'hours_per_week', 'native_country']
X = data[features].values
y = data['income_bracket'].values

# Encode the string labels to integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Identify categorical features
categorical_features = ['workclass', 'education', 'marital_status', 'occupation', 
                        'relationship', 'race', 'sex', 'native_country']

# OneHotEncode the categorical features
categorical_encoder = OneHotEncoder(sparse_output=False)
categorical_encoded = categorical_encoder.fit_transform(data[categorical_features])

# Combine the numerical features with the encoded categorical features
numerical_features = ['age', 'education_num', 'capital_gain', 'capital_loss', 'hours_per_week']
X_combined = np.hstack((data[numerical_features].values, categorical_encoded))

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_combined, y_encoded, test_size=0.2, random_state=42)

# Scale the numerical features in the training data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train[:, :len(numerical_features)])
X_test_scaled = scaler.transform(X_test[:, :len(numerical_features)])

# Combine the scaled numerical features with the encoded categorical features
X_train_final = np.hstack((X_train_scaled, X_train[:, len(numerical_features):]))
X_test_final = np.hstack((X_test_scaled, X_test[:, len(numerical_features):]))

# Save the scaler and encoders for later use during prediction
joblib.dump(scaler, f'{MODEL_LOCAL_PATH}/scaler.pkl')
joblib.dump(label_encoder, f'{MODEL_LOCAL_PATH}/label_encoder.pkl')
joblib.dump(categorical_encoder, f'{MODEL_LOCAL_PATH}/categorical_encoder.pkl')


['./adult-income-cpr-model/categorical_encoder.pkl']

In [5]:
t = joblib.load(f'{MODEL_LOCAL_PATH}/scaler.pkl')


In [6]:
# Define the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_final.shape[1],)),
    Dense(64, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_final, y_train, epochs=1, validation_split=0.2)

model.save(f"{MODEL_LOCAL_PATH}/{MODEL_NAME}.keras")
# tf.keras.models.save_model(model, f"{MODEL_LOCAL_PATH}/{MODEL_NAME}.keras")

t_model = tf.keras.models.load_model(f"{MODEL_LOCAL_PATH}/{MODEL_NAME}.keras")
print(tf.__version__)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m652/652[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8209 - loss: 0.3717 - val_accuracy: 0.8511 - val_loss: 0.3328
2.17.0


In [7]:
# Upload the model to Google Cloud Storage
def upload_directory_to_gcs(local_directory, bucket_name, gcs_destination):
    """
    Uploads a local directory to a Google Cloud Storage bucket.

    Args:
        local_directory (str): Path to the local directory.
        bucket_name (str): Name of the GCS bucket.
        gcs_destination (str): GCS destination path.
    """
    client = storage.Client()
    bucket = client.bucket(bucket_name)

    for root, dirs, files in os.walk(local_directory):
        for file in files:
            local_path = os.path.join(root, file)
            relative_path = os.path.relpath(local_path, local_directory)
            gcs_path = os.path.join(gcs_destination, relative_path)

            blob = bucket.blob(gcs_path)
            blob.upload_from_filename(local_path)
            print(f"Uploaded {local_path} to gs://{bucket_name}/{gcs_path}")

# Call the function to upload the directory
upload_directory_to_gcs(MODEL_LOCAL_PATH, BUCKET_NAME, GCS_FOLDER)

Uploaded ./adult-income-cpr-model/adult-income-cpr-model.keras to gs://jwd-test-sbcl/adult-income-cpr-model/adult-income-cpr-model.keras
Uploaded ./adult-income-cpr-model/categorical_encoder.pkl to gs://jwd-test-sbcl/adult-income-cpr-model/categorical_encoder.pkl
Uploaded ./adult-income-cpr-model/scaler.pkl to gs://jwd-test-sbcl/adult-income-cpr-model/scaler.pkl
Uploaded ./adult-income-cpr-model/label_encoder.pkl to gs://jwd-test-sbcl/adult-income-cpr-model/label_encoder.pkl


In [8]:
%%writefile $SRC_LOCAL_PATH/requirements.txt
fastapi
uvicorn
pandas
tensorflow
google-cloud-storage
google-cloud-aiplatform[prediction]
scikit-learn
dill

Overwriting ./source/requirements.txt


In [9]:
%%writefile $SRC_LOCAL_PATH/predictor.py

import pandas as pd
import numpy as np
import pickle
import joblib
import os
import shutil
import tensorflow as tf
from typing import Dict

from google.cloud.aiplatform.prediction.predictor import Predictor
from google.cloud.aiplatform.utils import prediction_utils

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder

MODEL_NAME = "adult-income-cpr-model"

class CustomPredictor(Predictor):
    
    def __init__(self):
        return
    
    def load(self, artifacts_uri: str):
        prediction_utils.download_model_artifacts(artifacts_uri)
        files_and_dirs = os.listdir('.')
        for item in files_and_dirs:
            print(item)
        print(tf.__version__)
        temp = tf.keras.models.load_model(f"{MODEL_NAME}.keras")
        
#         try:
#             print('doing scaler')
#             with open(f"scaler.pkl", "rb") as f:
#                 scaler = joblib.load(f)
#             self._scaler = scaler
#         except Exception as e:
#             print(e)
    
#         try:
#             print('doing label')
#             with open(f"label_encoder.pkl", "rb") as f:
#                 label_encoder = joblib.load(f)
#             self._label_encoder = label_encoder
#         except Exception as e:
#             print(e)
        
#         try:
#             print('doing categories')
#             with open(f"categorical_encoder.pkl", "rb") as f:
#                 categorical_encoder = joblib.load(f)
#             self._categorical_encoder = categorical_encoder
#         except Exception as e:
#             print(e)
        
#         try:
#             print('doing model - take 1')
#             self._model = tf.keras.models.load_model(f"{MODEL_NAME}.keras")
#         except Exception as e:
#             print(e)
        

    def preprocess(self, prediction_input):
        instances = prediction_input
        # instances_numeric_features = np.array([[instance[0], instance[3], instance[9], instance[10], instance[11]] for instance in instances])
        # instances_scaled_numeric_features = scaler.transform(instances_numeric_features)        
        # instances_categorical_features = np.array([instance[1:3] + instance[4:9] + [instance[12]] for instance in instances])
        # instances_categorical_encoded = categorical_encoder.transform(instances_categorical_features)
        # instances_combined = np.hstack((instances_scaled_numeric_features, instances_categorical_encoded))
        # preprocessed_instances_as_list = instances_combined.tolist()
        # return preprocessed_instances_as_list
        return prediction_input

    def predict(self, instances):
        return self._model.predict(instances=instances)

    def postprocess(self, prediction_results):
        output = []
        for prediction in response.predictions:
            predicted_label = np.argmax(prediction)
            decoded_label = label_encoder.inverse_transform([predicted_label])[0]
            output_entry = {
                "predicted_label": deocded_label,
                "predicted_probabilities": prediction
            }
            output.append(output_entry)
        return output

Overwriting ./source/predictor.py


In [10]:
import importlib
from google.cloud.aiplatform.prediction import LocalModel
from source.predictor import CustomPredictor

REPOSITORY = "adult-income-cpr-repo"  # @param {type:"string"}
IMAGE = "adult-income-cpr-server"  # @param {type:"string"}

local_model = LocalModel.build_cpr_model(
    SRC_LOCAL_PATH,
    f"{LOCATION}-docker.pkg.dev/{PROJECT}/{REPOSITORY}/{IMAGE}",
    predictor=CustomPredictor,
    requirements_path=os.path.join(SRC_LOCAL_PATH, "requirements.txt"),
)
     

  self.stdin = io.open(p2cwrite, 'wb', bufsize)
  self.stdout = io.open(c2pread, 'rb', bufsize)


In [11]:
local_model.get_serving_container_spec()

image_uri: "us-central1-docker.pkg.dev/jwd-test-sbcl/adult-income-cpr-repo/adult-income-cpr-server"
predict_route: "/predict"
health_route: "/health"

In [12]:
request = """
[
    [39,"Private", "9th",5,"Married-civ-spouse","Other-service","Wife","Black","Female",3411,0,34,"United-States"],
    [77,"Private", "9th",5,"Married-civ-spouse","Priv-house-serv","Wife","Black","Female",0,0,10,"United-States"],
    [27,"Local-gov","HS-grad",9,"Married-civ-spouse","Exec-managerial","Husband","White","Male",0,0,80,"United-States"],
    [40,"Private","Masters",14,"Married-civ-spouse","Exec-managerial","Husband","White","Male",0,0,46,"United-States"]

]
"""
with local_model.deploy_to_local_endpoint(
    artifact_uri=f"gs://{BUCKET_NAME}/{GCS_FOLDER}"
) as local_endpoint:
    health_check_response = local_endpoint.run_health_check()
    print(health_check_response, health_check_response.content)

    predict_response = local_endpoint.predict(
        request=request,
        headers={"header-key": "header-value"},
    )
    print(predict_response, predict_response.content)

    local_endpoint.print_container_logs()

<Response [200]> b'{}'
<Response [400]> b'{"detail":"Unsupported content type of the request: None.\\nCurrently supported content-type in DefaultSerializer: \\"application/json\\"."}'


In [None]:
from google.cloud import artifactregistry_v1
from google.cloud.artifactregistry_v1 import Repository
from google.api_core.operation import Operation

def create_gcloud_repository(repository, region):
    client = artifactregistry_v1.ArtifactRegistryClient()
    parent = f"projects/{PROJECT}/locations/{region}"

    repo = Repository(
        name=f"{parent}/repositories/{repository}",
        format_=artifactregistry_v1.Repository.Format.DOCKER
    )

    try:
        operation: Operation = client.create_repository(
            parent=parent,
            repository_id=repository,
            repository=repo
        )
        print("Waiting for operation to complete...")
        response = operation.result()  # Wait for the operation to complete
        print(f"Repository created successfully: {response.name}")
    except Exception as e:
        print(f"Error creating repository: {e}")

create_gcloud_repository(REPOSITORY, REGION)
local_model.push_image()
print('done')

In [None]:
from google.cloud import aiplatform
aiplatform.init(project=PROJECT, location=REGION)
model = aiplatform.Model.upload(
    local_model=local_model,
    display_name=MODEL_NAME,
    artifact_uri=f"gs://{BUCKET_NAME}/{GCS_FOLDER}",
)
endpoint = model.deploy(machine_type="n1-standard-4")


In [None]:
health_check_response = le.run_health_check()
print(health_check_response, health_check_response.content)


In [None]:
%%writefile instances.json
[
    [39,"Private", "9th",5,"Married-civ-spouse","Other-service","Wife","Black","Female",3411,0,34,"United-States"],
    [77,"Private", "9th",5,"Married-civ-spouse","Priv-house-serv","Wife","Black","Female",0,0,10,"United-States"],
    [27,"Local-gov","HS-grad",9,"Married-civ-spouse","Exec-managerial","Husband","White","Male",0,0,80,"United-States"],
    [40,"Private","Masters",14,"Married-civ-spouse","Exec-managerial","Husband","White","Male",0,0,46,"United-States"]

]

In [None]:
predict_response = le.predict(
        request_file=INPUT_FILE,
        headers={"Content-Type": "application/json"},
    )

In [None]:
instances = [
    [39,"Private", "9th",5,"Married-civ-spouse","Other-service","Wife","Black","Female",3411,0,34,"United-States"],
    [77,"Private", "9th",5,"Married-civ-spouse","Priv-house-serv","Wife","Black","Female",0,0,10,"United-States"],
    [27,"Local-gov","HS-grad",9,"Married-civ-spouse","Exec-managerial","Husband","White","Male",0,0,80,"United-States"],
    [40,"Private","Masters",14,"Married-civ-spouse","Exec-managerial","Husband","White","Male",0,0,46,"United-States"]

]
response = endpoint.predict(instances=instances)
print(response.predictions)