In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import sklearn

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

In [2]:
print('The scikit-learn version is {}.'.format(sklearn.__version__))

The scikit-learn version is 1.0.


In [24]:
from datetime import datetime

TIMESTAMP = str(datetime.now())
BUCKET_NAME = "automl-output-mlops"
MODEL_FILENAME = "model.pkl"
CSV_FILENAME = "test.csv"
LOCATION = "us-central1"
REGION = LOCATION
#pre-built containers
DOCKER_IMAGE_URI = "us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-0:latest" # https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers

#TODO: change to actual project
PROJECT_ID = "mlops-demos-306914"

In [13]:
iris_data = load_iris(as_frame=True)
df = pd.DataFrame(data=iris_data.data)
df.to_csv(CSV_FILENAME, index=False)

data = iris_data.data
labels = iris_data.target
print(data.shape)
print(labels.shape)

(150, 4)
(150,)


In [4]:
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=13)

In [5]:
clf = Pipeline([
    
    # Scaler
    ('std_scaler', StandardScaler()),
    
    # Classifier
    ('gbtrees', GradientBoostingClassifier())

])
clf.fit(X=x_train, y=y_train)

Pipeline(steps=[('std_scaler', StandardScaler()),
                ('gbtrees', GradientBoostingClassifier())])

In [6]:
print(f'Model train accuracy:{accuracy_score(y_train, clf.predict(x_train))}')
print(f'Model test accuracy:{accuracy_score(y_test, clf.predict(x_test))}')

Model train accuracy:1.0
Model test accuracy:0.9


In [8]:
pickle.dump(clf, open(MODEL_FILENAME, "wb"))

In [9]:
loaded_model = pickle.load(open(MODEL_FILENAME, 'rb'))
result = loaded_model.score(x_test, y_test)
print(result)

0.9


In [10]:
#https://cloud.google.com/storage/docs/uploading-objects
from google.cloud import storage

def upload_blob(bucket_name, source_file_name, destination_blob_name):

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)

In [11]:
upload_blob(bucket_name=BUCKET_NAME, source_file_name=MODEL_FILENAME, destination_blob_name=MODEL_FILENAME)

In [14]:
upload_blob(bucket_name=BUCKET_NAME, source_file_name=CSV_FILENAME, destination_blob_name=CSV_FILENAME)

In [None]:
#https://cloud.google.com/vertex-ai/docs/model-registry/import-model#pre-built-container
#https://github.com/googleapis/python-aiplatform/blob/HEAD/samples/model-builder/upload_model_sample.py
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=LOCATION)

model = aiplatform.Model.upload(
        display_name=TIMESTAMP,
        artifact_uri="gs://"+BUCKET_NAME+"/",
        serving_container_image_uri=DOCKER_IMAGE_URI)

model.wait()

print(model.display_name)
print(model.resource_name)

In [26]:
learning_rate=0.01

In [27]:
print(str(learning_rate))

0.01


In [17]:
#https://github.com/googleapis/python-aiplatform/blob/2bc9b2b0d048c29ba43c8b4c3ea51370515d08c3/samples/model-builder/create_batch_prediction_job_sample.py
from google.cloud import aiplatform

#No need to re-init but I wanted the cells to be standalone
aiplatform.init(project=PROJECT_ID, location=LOCATION)

#TODO: add monitoring
#https://cloud.google.com/vertex-ai/docs/model-monitoring/model-monitoring-batch-predictions#console
#https://colab.sandbox.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_monitoring/batch_prediction_model_monitoring.ipynb
my_model = aiplatform.Model(model.resource_name)
source = "gs://"+BUCKET_NAME+"/test.csv"
print(source)
batch_prediction_job = my_model.batch_predict(
        job_display_name=TIMESTAMP,
        gcs_source=[source],
        gcs_destination_prefix="gs://"+BUCKET_NAME,
        machine_type = "n1-standard-32",
        starting_replica_count=1,
        max_replica_count=2,
        sync=False,
        instances_format="csv" #https://googleapis.dev/python/aiplatform/latest/aiplatform.html
)

batch_prediction_job.wait()

print(batch_prediction_job.display_name)
print(batch_prediction_job.resource_name)
print(batch_prediction_job.state)

gs://automl-output-mlops/test.csv
Creating BatchPredictionJob
BatchPredictionJob created. Resource name: projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888
To use this BatchPredictionJob in another session:
bpj = aiplatform.BatchPredictionJob('projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888')
View Batch Prediction Job:
https://console.cloud.google.com/ai/platform/locations/us-central1/batch-predictions/8561588357602213888?project=983707479002
BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/8561588357602213888 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/983707479002/location

In [None]:
#download CSV from bucket
#Write to database X