In [3]:
from google.cloud import aiplatform
from google.cloud.aiplatform_v1.types import SampledShapleyAttribution
from google.cloud.aiplatform_v1.types.explanation import ExplanationParameters

project_id = "udemy-mlops"
region = "us-central1"
bucket = "gs://sid-kubeflow-v1"

aiplatform.init(project=project_id, location=region, staging_bucket=bucket)

<h4>Custom Model Training

In [4]:

job = aiplatform.CustomTrainingJob(
    display_name="bikeshare-training-job",
    script_path="model-training-code.py",
    container_uri="us-docker.pkg.dev/vertex-ai/training/scikit-learn-cpu.0-23:latest",
    requirements=["gcsfs"]
)

job.run(
    replica_count=1,
    machine_type="n1-standard-4",
    sync=True
)

job.wait()

Training script copied to:
gs://sid-kubeflow-v1/aiplatform-2023-07-24-06:37:20.777-aiplatform_custom_trainer_script-0.1.tar.gz.
Training Output directory:
gs://sid-kubeflow-v1/aiplatform-custom-training-2023-07-24-06:37:21.141 
View Training:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/3233475749520343040?project=1090925531874
CustomTrainingJob projects/1090925531874/locations/us-central1/trainingPipelines/3233475749520343040 current state:
PipelineState.PIPELINE_STATE_RUNNING
View backing custom job:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/8562773424978526208?project=1090925531874
CustomTrainingJob projects/1090925531874/locations/us-central1/trainingPipelines/3233475749520343040 current state:
PipelineState.PIPELINE_STATE_RUNNING
CustomTrainingJob projects/1090925531874/locations/us-central1/trainingPipelines/3233475749520343040 current state:
PipelineState.PIPELINE_STATE_RUNNING
CustomTrainingJob projects/1090925531

<h4>Upload Trained Model to Model Registry 

In [5]:
display_name = "bikeshare-model-sdk"
artifact_uri = "gs://sid-kubeflow-v1/bikeshare-model/artifact/"
serving_container_image_uri = "us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-0:latest"

exp_metadata = {"inputs": {"input_features": {}}, "outputs": {"predicted_outcome": {}}}

model = aiplatform.Model.upload(
        display_name=display_name,
        artifact_uri=artifact_uri,
        serving_container_image_uri=serving_container_image_uri,
        explanation_metadata=exp_metadata,
        explanation_parameters=ExplanationParameters(
                sampled_shapley_attribution=SampledShapleyAttribution(path_count=25)
            ),
        sync=False
    )

model.wait()

Creating Model
Create Model backing LRO: projects/1090925531874/locations/us-central1/models/6594037313586593792/operations/6210128741255872512
Model created. Resource name: projects/1090925531874/locations/us-central1/models/6594037313586593792@1
To use this Model in another session:
model = aiplatform.Model('projects/1090925531874/locations/us-central1/models/6594037313586593792@1')


<h4>Deploy Model to Vertex Endpoint

In [6]:
deployed_model_display_name = "bikeshare-endpoint-exp-v1"
traffic_split = {"0": 100}
machine_type = "n1-standard-4"
min_replica_count = 1
max_replica_count = 1

endpoint = model.deploy(
        deployed_model_display_name=deployed_model_display_name,
        traffic_split=traffic_split,
        machine_type=machine_type,
        min_replica_count=min_replica_count,
        max_replica_count=max_replica_count
    )

Creating Endpoint
Create Endpoint backing LRO: projects/1090925531874/locations/us-central1/endpoints/2961816044391366656/operations/1024233770338746368
Endpoint created. Resource name: projects/1090925531874/locations/us-central1/endpoints/2961816044391366656
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/1090925531874/locations/us-central1/endpoints/2961816044391366656')
Deploying model to Endpoint : projects/1090925531874/locations/us-central1/endpoints/2961816044391366656
Deploy Endpoint model backing LRO: projects/1090925531874/locations/us-central1/endpoints/2961816044391366656/operations/4480746484345602048
Endpoint model deployed. Resource name: projects/1090925531874/locations/us-central1/endpoints/2961816044391366656


In [None]:
endpoint = aiplatform.Endpoint('projects/1090925531874/locations/us-central1/endpoints/2961816044391366656')

<h4> Run Predictions

In [20]:
instances_list = [
    [0.24, 0.81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
    [0.8,0.27, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,0.0,0.0,0.0, 0.0, 0.0, 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0]
    ]

prediction = endpoint.predict(instances_list)
print(prediction)

Prediction(predictions=[4.113863795903288, 5.874693031755736], deployed_model_id='4375161276083470336', model_version_id='1', model_resource_name='projects/1090925531874/locations/us-central1/models/6594037313586593792', explanations=None)


<h4> Define the input features used for model training

In [21]:
features = ['time',
 'coupon',
 'expiration',
 'age',
 'education',
 'occupation',
 'income',
 'Bar',
 'CoffeeHouse',
 'CarryAway',
 'Restaurant20To50',
 'toCoupon_GEQ15min',
 'toCoupon_GEQ25min',
 'direction_same',
 'passanger_destination',
 'marital_hasChildren',
 'temperature_weather']

In [37]:
response = endpoint.explain(instances=instances_list)

for explanation in response.explanations:
    attributions = explanation.attributions
    for attribution in attributions:
            print("  attribution")
            print("   baseline_output_value:", attribution.baseline_output_value)
            print("   instance_output_value:", attribution.instance_output_value)
            print("   output_display_name:", attribution.output_display_name)
            print("   approximation_error:", attribution.approximation_error)
            print("   output_name:", attribution.output_name)
            output_index = attribution.output_index
            for output_index in output_index:
                # print("   output_index:", output_index)
                attrs = attribution.feature_attributions
                rows = {"feature_name": [], "attribution": []}
                for i, val in enumerate(features):
                    rows["feature_name"].append(val)
                    rows["attribution"].append(attrs["input_features"][i])
            
            print(rows)
            print("-------------------------------------")

  attribution
   baseline_output_value: 4.105136956929912
   instance_output_value: 4.1138637959032875
   output_display_name: 
   approximation_error: 0.0008908002980896847
   output_name: predicted_outcome
{'feature_name': ['time', 'coupon', 'expiration', 'age', 'education', 'occupation', 'income', 'Bar', 'CoffeeHouse', 'CarryAway', 'Restaurant20To50', 'toCoupon_GEQ15min', 'toCoupon_GEQ25min', 'direction_same', 'passanger_destination', 'marital_hasChildren', 'temperature_weather'], 'attribution': [0.3221664264934795, -0.2107985446977723, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
-------------------------------------
  attribution
   baseline_output_value: 4.105136956929912
   instance_output_value: 5.874693031755736
   output_display_name: 
   approximation_error: 0.00016727627601971653
   output_name: predicted_outcome
{'feature_name': ['time', 'coupon', 'expiration', 'age', 'education', 'occupation', 'income', 'Bar', 'CoffeeHouse', 'CarryAway', 'Re

In [None]:
# model = aiplatform.Model('projects/1090925531874/locations/us-central1/models/6594037313586593792')

gcs_input_uri = 'gs://sid-vertex-mlops/bike-share/batch.csv'
BUCKET_URI = "gs://sid-kubeflow-v1/bikeshare-model/bikeshare-batch-prediction-output"

batch_predict_job = model.batch_predict(
    job_display_name="bikeshare_batch_predict",
    gcs_source=gcs_input_uri,
    gcs_destination_prefix=BUCKET_URI,
    instances_format="csv",
    predictions_format="jsonl",
    machine_type="n1-standard-4",
    starting_replica_count=1,
    max_replica_count=1,
    generate_explanation=True,
    sync=False
)