In [64]:
!gcloud services enable aiplatform.googleapis.com

E0531 14:10:26.947290868     246 backup_poller.cc:136]       Run client channel backup poller: {"created":"@1654006226.947056850","description":"pollset_work","file":"src/core/lib/iomgr/ev_epoll1_linux.cc","file_line":247,"referenced_errors":[{"created":"@1654006226.947036106","description":"Bad file descriptor","errno":9,"file":"src/core/lib/iomgr/ev_epoll1_linux.cc","file_line":732,"os_error":"Bad file descriptor","syscall":"epoll_wait"}]}
Operation "operations/acat.p2-662407373696-b39fb5d4-514d-4bd4-a079-724563a2e6ca" finished successfully.


In [54]:
!gcloud config list

[core]
account = argolis-demo@senchan.altostrat.com
disable_usage_reporting = True
project = vertex-pipelines-handson

Your active configuration is: [default]


# Titanic tutorial

In [2]:
!pip3 install --user -U google-cloud-aiplatform 
!pip3 install --user -U kfp
!pip3 install --user -U google_cloud_pipeline_components

Collecting google_cloud_pipeline_components
  Downloading google_cloud_pipeline_components-1.0.7-py3-none-any.whl (460 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m460.6/460.6 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting google-cloud-notebooks>=0.4.0
  Downloading google_cloud_notebooks-1.3.1-py2.py3-none-any.whl (353 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m353.9/353.9 kB[0m [31m42.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: google-cloud-notebooks, google_cloud_pipeline_components
Successfully installed google-cloud-notebooks-1.3.1 google_cloud_pipeline_components-1.0.7


In [2]:
# kfpを新規installした場合こちらを実行
import os

if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

In [84]:
from datetime import datetime
import matplotlib.pyplot as plt
import pandas as pd
from typing import NamedTuple
from kfp.v2 import dsl
from kfp.v2.dsl import (Artifact,
                        Dataset,
                        Input,
                        Model,
                        Output,
                        Metrics,
                        ClassificationMetrics,
                        pipeline,
                        component, 
                        OutputPath, 
                        InputPath)

from kfp.v2 import compiler
from google.cloud import bigquery
from google.cloud import aiplatform
# We'll use this namespace for metadata querying
from google.cloud import aiplatform_v1
from google.cloud import storage
from google.cloud.aiplatform import pipeline_jobs
from google_cloud_pipeline_components import aiplatform as gcc_aip

In [30]:
PROJECT_ID="vertex-pipelines-handson"

In [5]:
# BigQueryにDatasetを作成
client = bigquery.Client()

dataset = bigquery.Dataset("{}.vertexai_handson".format(PROJECT_ID))
dataset.location = "US"

dataset = client.create_dataset(dataset, timeout=30)

Conflict: 409 POST https://bigquery.googleapis.com/bigquery/v2/projects/vertex-pipelines-handson/datasets?prettyPrint=false: Already Exists: Dataset vertex-pipelines-handson:vertexai_handson

In [15]:
# BigQueryにデータを投入
client = bigquery.Client()
job_config = bigquery.LoadJobConfig(
    autodetect=True, source_format=bigquery.SourceFormat.CSV
)
uri = "gs://bigquery-handson/titanic/titanic.csv"
table_id = "{}.vertexai_handson.titanic".format(PROJECT_ID)
load_job = client.load_table_from_uri(
    uri, table_id, job_config=job_config
)  # Make an API request.
load_job.result()  # Waits for the job to complete.
destination_table = client.get_table(table_id)
print("Loaded {} rows.".format(destination_table.num_rows))

Loaded 1309 rows.


In [77]:
BUCKET_NAME = "{}-pipelines".format(PROJECT_ID)
PIPELINE_ROOT = f"gs://{BUCKET_NAME}/pipeline_root/"

In [78]:
client = storage.Client(
#    credentials=credentials,
    project=PROJECT_ID
)

bucket = client.create_bucket(BUCKET_NAME)
bucket.iam_configuration.uniform_bucket_level_access_enabled = True
bucket.patch()

In [79]:
REGION = "us-central1"  # @param {type: "string"}
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

In [85]:
@component(
    packages_to_install=["google-cloud-bigquery", "pandas", "pyarrow", "db-dtypes"],
    base_image="python:3.9",
    output_component_file="create_dataset.yaml"
)
def get_dataframe(
    bq_table: str,
    project: str,
    output_data_path: OutputPath("Dataset")
):
    from google.cloud import bigquery
    import pandas as pd

    bqclient = bigquery.Client(project=project)
    table = bigquery.TableReference.from_string(
        bq_table
    )
    rows = bqclient.list_rows(
        table
    )
    dataframe = rows.to_dataframe(
        create_bqstorage_client=True,
    )
    dataframe = dataframe.sample(frac=1, random_state=2)
    dataframe.to_csv(output_data_path, index=False)

In [86]:
@component(
    packages_to_install=["sklearn", "pandas", "joblib", "db-dtypes"],
    base_image="python:3.9",
    output_component_file="titanic_model_component.yaml",
)
def sklearn_train(
    dataset: Input[Dataset],
    metrics: Output[Metrics],
    model: Output[Model]
):
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.metrics import roc_curve
    from sklearn.model_selection import train_test_split
    from joblib import dump

    import pandas as pd
    df = pd.read_csv(dataset.path)
    df = df.drop(["name","ticket","cabin","boat","body","home_dest"], axis=1)
    df["sex"] = df["sex"].map({"male":0,"female":1})
    df = df[df.fare != '?']
    df = df[df.embarked != '?']
    df = df[df.age != '?']
    df = pd.get_dummies(df, columns=['embarked'])
    labels = df.pop("survived").tolist()
    data = df.values.tolist()
    x_train, x_test, y_train, y_test = train_test_split(data, labels)

    skmodel = DecisionTreeClassifier()
    skmodel.fit(x_train,y_train)
    score = skmodel.score(x_test,y_test)
    print('accuracy is:',score)

    metrics.log_metric("accuracy",(score * 100.0))
    metrics.log_metric("framework", "Scikit Learn")
    metrics.log_metric("dataset_size", len(df))
    dump(skmodel, model.path + ".joblib")

In [87]:
@component(
    packages_to_install=["google-cloud-aiplatform"],
    base_image="python:3.9",
    output_component_file="titanic_deploy_component.yaml",
)
def deploy_model(
    model: Input[Model],
    project: str,
    region: str,
    vertex_endpoint: Output[Artifact],
    vertex_model: Output[Model]
):
    from google.cloud import aiplatform

    aiplatform.init(project=project, location=region)

    deployed_model = aiplatform.Model.upload(
        display_name="titanic-model-pipeline",
        artifact_uri = model.uri.replace("model", ""),
        serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.0-24:latest"
    )
    endpoint = deployed_model.deploy(machine_type="n1-standard-4")

    # Save data to the output params
    vertex_endpoint.uri = endpoint.resource_name
    vertex_model.uri = deployed_model.resource_name

In [88]:
@pipeline(
    # Default pipeline root. You can override it when submitting the pipeline.
    pipeline_root=PIPELINE_ROOT,
    # A name for the pipeline.
    name="titanic-pipeline",
)
def pipeline(
    bq_table: str = "",
    output_data_path: str = "data.csv",
    project: str = PROJECT_ID,
    region: str = REGION
):
    dataset_task = get_dataframe(bq_table, project)

    model_task = sklearn_train(
        dataset_task.output
    )

    deploy_task = deploy_model(
        model=model_task.outputs["model"],
        project=project,
        region=region
    )

In [60]:
!gcloud auth list


No credentialed accounts.

To login, run:
  $ gcloud auth login `ACCOUNT`



In [50]:
!gcloud config set account argolis-demo@senchan.altostrat.com
#!gcloud auth login --no-launch-browser

Updated property [core/account].


You are running on a Google Compute Engine virtual machine.
It is recommended that you use service accounts for authentication.

You can run:

  $ gcloud config set account `ACCOUNT`

to switch accounts if necessary.

Your credentials may be visible to others with access to this
virtual machine. Are you sure you want to authenticate with
your personal account?

Do you want to continue (Y/n)?  ^C


Command killed by keyboard interrupt



In [89]:
compiler.Compiler().compile(
    pipeline_func=pipeline, package_path="titanic_pipeline.json"
)



In [90]:
titanic_run = aiplatform.PipelineJob(
    display_name="titanic-pipeline",
    template_path="titanic_pipeline.json",
    job_id="titanic-pipeline-{0}".format(TIMESTAMP),
    parameter_values={"bq_table": "{}.vertexai_handson.titanic".format(PROJECT_ID)},
    enable_caching=False,
)

In [91]:
titanic_run.submit()

Creating PipelineJob
PipelineJob created. Resource name: projects/662407373696/locations/us-central1/pipelineJobs/titanic-pipeline-20220531153824
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/662407373696/locations/us-central1/pipelineJobs/titanic-pipeline-20220531153824')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/titanic-pipeline-20220531153824?project=662407373696


In [72]:
ENDPOINT_NAME="titanic-model-pipeline_endpoint"
#ENDPOINT_NAME="test"
instance = [[3,0,18,0,0,8.6625,0,0,1]]
ENDPOINT_ID = !(gcloud ai endpoints list --region=$REGION \
              --format='value(ENDPOINT_ID)' \
              --filter=display_name=$ENDPOINT_NAME \
              --sort-by=creationTimeStamp | tail -1)
ENDPOINT_ID = ENDPOINT_ID[1]

In [70]:
print(ENDPOINT_ID)

7093336538875953152


In [266]:
# Public のみ対応

In [270]:
aiplatform.init(project=project, location=location)

NameError: name 'project' is not defined

In [73]:
def endpoint_predict(
    project: str, location: str, instances: list, endpoint: str
):
    aiplatform.init(project=project, location=location)

    endpoint = aiplatform.Endpoint(endpoint)

    prediction = endpoint.predict(instances=instances)
    return prediction

endpoint_predict(PROJECT_ID, REGION, instance, ENDPOINT_ID)

Prediction(predictions=[0.0], deployed_model_id='2403224555062558720', explanations=None)

In [74]:
%time endpoint_predict(PROJECT_ID, REGION, instance, ENDPOINT_ID)

CPU times: user 21.1 ms, sys: 33 ms, total: 54.1 ms
Wall time: 1.7 s


Prediction(predictions=[0.0], deployed_model_id='2403224555062558720', explanations=None)