In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import sklearn

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

### Check the version of the framework you are using because the pre built container used for batch prediction needs to match

In [2]:
print('The scikit-learn version is {}.'.format(sklearn.__version__))

The scikit-learn version is 1.0.2.


In [3]:
iris_data = load_iris(as_frame=True)
iris_data.data.to_csv("iris.csv")
data = iris_data.data
labels = iris_data.target
print(data.shape)
print(labels.shape)

(150, 4)
(150,)


In [4]:
data.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [5]:
labels.head()

0    0
1    0
2    0
3    0
4    0
Name: target, dtype: int64

In [6]:
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=13)

In [7]:
clf = Pipeline([
    
    # Scaler
    ('std_scaler', StandardScaler()),
    
    # Classifier
    ('gbtrees', GradientBoostingClassifier())

])
clf.fit(X=x_train, y=y_train)

Pipeline(steps=[('std_scaler', StandardScaler()),
                ('gbtrees', GradientBoostingClassifier())])

In [8]:
print(f'Model train accuracy:{accuracy_score(y_train, clf.predict(x_train))}')
print(f'Model test accuracy:{accuracy_score(y_test, clf.predict(x_test))}')

Model train accuracy:1.0
Model test accuracy:0.9


In [9]:
from datetime import datetime

# Getting the current date and time
TIMESTAMP = str(datetime.now())
BUCKET_NAME = "automl-output-mlops"
FILENAME = "demo_etl.pkl"
LOCATION = "us-central1"
#pre-built containers
DOCKER_IMAGE_URI = "us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-0:latest" # https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers
#TODO: change to actual project
PROJECT_ID = "mlops-demos-306914"

In [10]:
pickle.dump(clf, open(FILENAME, "wb"))

In [11]:
loaded_model = pickle.load(open(FILENAME, 'rb'))
result = loaded_model.score(x_test, y_test)
print(result)

0.9


In [12]:
#https://cloud.google.com/storage/docs/uploading-objects
from google.cloud import storage

def upload_blob(bucket_name, source_file_name, destination_blob_name):

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)
    
upload_blob(bucket_name=BUCKET_NAME, source_file_name=FILENAME, destination_blob_name=FILENAME)

In [13]:
#https://cloud.google.com/vertex-ai/docs/model-registry/import-model#pre-built-container
#https://github.com/googleapis/python-aiplatform/blob/HEAD/samples/model-builder/upload_model_sample.py
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=LOCATION)
#TODO: change display name
model = aiplatform.Model.upload(
        display_name=TIMESTAMP,
        artifact_uri="gs://"+BUCKET_NAME+"/",
        serving_container_image_uri=DOCKER_IMAGE_URI,
        sync=True)

model.wait()

print(model.display_name)
print(model.resource_name)

Creating Model
Create Model backing LRO: projects/983707479002/locations/us-central1/models/2574119448344526848/operations/9190294672642146304
Model created. Resource name: projects/983707479002/locations/us-central1/models/2574119448344526848@1
To use this Model in another session:
model = aiplatform.Model('projects/983707479002/locations/us-central1/models/2574119448344526848@1')
2022-09-14 07:23:28.394969
projects/983707479002/locations/us-central1/models/2574119448344526848


# <span style="color:red"> Problem:</span>
## <span style="color:orange">Batch_predict job hangs. runs for 40 minutes and then fails with the following message: </span>
## <span style="color:orange">Error: model server never became ready. Please validate that your model file or container configuration are valid.</span>
## <span style="color:blue">The sci kit version used to train the model 1.0.2 </span>
## <span style="color:blue">pre built prediction container version is 1.0</span>

In [15]:
#https://github.com/googleapis/python-aiplatform/blob/2bc9b2b0d048c29ba43c8b4c3ea51370515d08c3/samples/model-builder/create_batch_prediction_job_sample.py
from google.cloud import aiplatform

#No need to re-init but I wanted the cells to be standalone
aiplatform.init(project=PROJECT_ID, location=LOCATION)

my_model = aiplatform.Model(model.resource_name)
#TODO: I pre uploaded the CSV to a bucket. Alternatively this can be done on the fly
#TODO: Remove test2
batch_prediction_job = my_model.batch_predict(
        job_display_name=TIMESTAMP,
        gcs_source="gs://automl-output-mlops/iris.csv",
        gcs_destination_prefix="gs://automl-output-mlops/",
        machine_type = "n1-standard-2")

batch_prediction_job.wait()

print(batch_prediction_job.display_name)
print(batch_prediction_job.resource_name)
print(batch_prediction_job.state)

Creating BatchPredictionJob
BatchPredictionJob created. Resource name: projects/983707479002/locations/us-central1/batchPredictionJobs/466455988333969408
To use this BatchPredictionJob in another session:
bpj = aiplatform.BatchPredictionJob('projects/983707479002/locations/us-central1/batchPredictionJobs/466455988333969408')
View Batch Prediction Job:
https://console.cloud.google.com/ai/platform/locations/us-central1/batch-predictions/466455988333969408?project=983707479002
BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/466455988333969408 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/466455988333969408 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/466455988333969408 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/983707479002/locations/us-central1/batchPredictionJobs/466455

KeyboardInterrupt: 

In [None]:
#download CSV from bucket
#Write to database X