In [1]:
! pip3 install --no-cache-dir --upgrade "kfp>2" \
                                        google-cloud-aiplatform



In [1]:
import sys

if "google.colab" in sys.modules:

    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

In [2]:
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()

In [3]:
PROJECT_ID = "henry-scien"  # @param {type:"string"}
LOCATION = "us-central1"
BQ_LOCATION = LOCATION.split("-")[0].upper()

In [4]:
BUCKET_URI = f"gs://xgboost_new_-{PROJECT_ID}-unique-custom"  

In [5]:
! gsutil mb -l $LOCATION -p $PROJECT_ID $BUCKET_URI

Creating gs://xgboost_new_-henry-scien-unique-custom/...


In [6]:
SERVICE_ACCOUNT = "523981946985-compute@developer.gserviceaccount.com" 

In [7]:
import sys

IS_COLAB = "google.colab" in sys.modules
if (
    SERVICE_ACCOUNT == ""
    or SERVICE_ACCOUNT is None
    or SERVICE_ACCOUNT == "[your-service-account]"
):
    # Get your service account from gcloud
    if not IS_COLAB:
        shell_output = !gcloud auth list 2>/dev/null
        SERVICE_ACCOUNT = shell_output[2].replace("*", "").strip()

    else:  # IS_COLAB:
        shell_output = ! gcloud projects describe  $PROJECT_ID
        project_number = shell_output[-1].split(":")[1].strip().replace("'", "")
        SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"

    print("Service Account:", SERVICE_ACCOUNT)

#### Set service account access for Vertex AI Pipelines

Run the following commands to grant your service account access to read and write pipeline artifacts in the bucket that you created in the previous step. You only need to run this step once per service account.

In [8]:
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectCreator $BUCKET_URI

! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectViewer $BUCKET_URI

In [9]:
import google.cloud.aiplatform as aiplatform
import kfp
from kfp import compiler, dsl
from kfp.dsl import Artifact, Dataset, Input, Metrics, Model, Output, component

## Initialize Vertex AI SDK for Python

Initialize the Vertex AI SDK for Python for your project and corresponding bucket.

In [10]:
aiplatform.init(project=PROJECT_ID, staging_bucket=BUCKET_URI)

In [11]:
PATH = %env PATH
%env PATH={PATH}:/home/jupyter/.local/bin

KFP_ENDPOINT = (
    "https://720c5bc00c3d6089-dot-us-central1.pipelines.googleusercontent.com/"
)

PIPELINE_ROOT = f"{BUCKET_URI}/diabetes_pipeline"  # This is where all pipeline artifacts are sent. You'll need to ensure the bucket is created ahead of time
PIPELINE_ROOT
print(f"PIPELINE_ROOT: {PIPELINE_ROOT}")

env: PATH=/usr/local/cuda/bin:/opt/conda/bin:/opt/conda/condabin:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games:/home/jupyter/.local/bin
PIPELINE_ROOT: gs://xgboost_new_-henry-scien-unique-custom/diabetes_pipeline


In [12]:
@component(
    packages_to_install=["pandas"]
)

def get_data(
    url: str,
    dataset: Output[Dataset],
):
    import pandas as pd
    df = pd.read_csv(url)
    df = df.dropna()
    df.to_csv(dataset.path, index=False)

  return component_factory.create_component_from_func(


In [13]:
@component(
    packages_to_install=[
        "xgboost==1.6.2",
        "pandas==1.3.5",
        "joblib==1.1.0",
        "scikit-learn==1.0.2",
    ],
)
def xgboost_training(
    dataset: Input[Dataset],
    model: Output[Model],
    metrics: Output[Metrics],
):
    import os
    import joblib
    import xgboost as xgb
    import pandas as pd
    from sklearn.model_selection import (RandomizedSearchCV, StratifiedKFold,
                                         train_test_split)
    from sklearn.metrics import (accuracy_score, precision_recall_curve,
                                 roc_auc_score)
    
    # Load the training census dataset
    with open(dataset.path, "r") as train_data:
        raw_data = pd.read_csv(train_data)
    
    # Separate features and target
    X = raw_data.drop(['Outcome'], axis=1)
    y = raw_data['Outcome']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    
    xgbc = xgb.XGBClassifier(n_estimators=100, learning_rate=0.1, gamma=0, subsample=0.5,colsample_bytree=1, max_depth=8)

    xgbc.fit(X_train,y_train)
    
    predictions =xgbc.predict(X_test)
    score = accuracy_score(y_test, predictions)
    auc = roc_auc_score(y_test, predictions)
    _ = precision_recall_curve(y_test, predictions)

    metrics.log_metric("accuracy", (score * 100.0))
    metrics.log_metric("framework", "xgboost")
    metrics.log_metric("dataset_size", len(raw_data))
    metrics.log_metric("AUC", auc)

    # Export the model to a file
    os.makedirs(model.path, exist_ok=True)
    joblib.dump(xgbc, os.path.join(model.path, "model.joblib"))
    

In [14]:
@component(
    packages_to_install=["google-cloud-aiplatform==1.25.0"],
)

def deploy_xgboost_model(
    model: Input[Model],
    project_id: str,
    vertex_endpoint: Output[Artifact],
    vertex_model: Output[Model],
):
    """Deploys an XGBoost model to Vertex AI Endpoint.

    Args:
        model: The model to deploy.
        project_id: The project ID of the Vertex AI Endpoint.

    Returns:
        vertex_endpoint: The deployed Vertex AI Endpoint.
        vertex_model: The deployed Vertex AI Model.
    """
    from google.cloud import aiplatform

    aiplatform.init(project=project_id)

    deployed_model = aiplatform.Model.upload(
        display_name="diabetes-demo-model",
        artifact_uri=model.uri,
        serving_container_image_uri="us-central1-docker.pkg.dev/henry-scien/custom-container-prediction-sdk/xgboost-server-sdk:latest",
    )
    endpoint = deployed_model.deploy(machine_type="n1-standard-4")

    vertex_endpoint.uri = endpoint.resource_name
    vertex_model.uri = deployed_model.resource_name

In [15]:
@dsl.pipeline(
    name="diabetes-demo-pipeline",
)
def pipeline():
    url: str = "https://raw.githubusercontent.com/prins2516/Dataset/main/datasets_228_482_diabetes.csv"
    """A demo pipeline."""

    data_op = get_data(url=url)  # Pass the url as a keyword argument

    training_task = xgboost_training(
        dataset=data_op.outputs["dataset"],
    )

    _ = deploy_xgboost_model(
        project_id=PROJECT_ID,
        model=training_task.outputs["model"],
    )

In [16]:
compiler.Compiler().compile(pipeline_func=pipeline, package_path="pipeline_xgboost.yaml")

In [17]:
job = aiplatform.PipelineJob(
    display_name="diabetes-demo-pipeline",
    template_path="pipeline_xgboost.yaml",
    pipeline_root=PIPELINE_ROOT,
)

job.run()

Creating PipelineJob
PipelineJob created. Resource name: projects/523981946985/locations/us-central1/pipelineJobs/diabetes-demo-pipeline-20240717111007
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/523981946985/locations/us-central1/pipelineJobs/diabetes-demo-pipeline-20240717111007')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/diabetes-demo-pipeline-20240717111007?project=523981946985
PipelineJob projects/523981946985/locations/us-central1/pipelineJobs/diabetes-demo-pipeline-20240717111007 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/523981946985/locations/us-central1/pipelineJobs/diabetes-demo-pipeline-20240717111007 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/523981946985/locations/us-central1/pipelineJobs/diabetes-demo-pipeline-20240717111007 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/52398194

In [19]:
from google.auth import default
from google.auth.transport.requests import Request

# Get default credentials
credentials, project_id = default()

# Refresh the credentials to get the access token
credentials.refresh(Request())
token = credentials.token

print(f"Access Token: {token}")


Access Token: ya29.c.c0ASRK0GbfDBrdhsTbaIHVpSX3-Cj11_Rd-QRcv4joAspuPw4Al6g4wZ8ixVGS7GTgT5Z_aHy4ENnKnGCs6mHIFIRylzYVq-1sXzvAsGjjemokYXP05MfiYZCDieysySdMdkv25OpdyNdcFIdD4c1Y_avMWkc4INwAKAmzzuFaLGajP9vbIoCwV8djok7v64WAVHaD_7RTp2LEuEPojtpTW1ElZVc-9tYOsqyEguvKfjPXEyyaxvA9j-IaqJEh5NMf9lCera0FYWTq-i0ct_b-ldu8Qh97q5hN-7CyUnheRcdLfnP_ALICTZRSA9jE9xFHdzCAClxmyXKndEG_GE46OIUYGDAxtlDG3CzQj8f-kHFZ6xvQmVzLc4oNLm3qLBh_NBkG396AF0pyJy1YxxZh1g7dV2WafbzR6Iw3X9ZS3cQvw67qMRFRoly_1UcyScQZVBalrliQrjnmrjW8S0wYw96kX3to4UOanrxg483rReWMbJpk_yvff5F176tqyO_9R72ik54mjwRUlcX-qcFkivUsF7rsWvJu75qczbFx38Qgngisypm0krsuR5f36J4u4t8VZ-oB6dsoW6wz_RRsml0SWB2viJn_vXp3UBF2QrUJQf_Upz5Fhq8Q-kqyXl_rFognmrudZ5n6OSvlU4lnVbWB91Qv9OcZqv5cf5IXUi0h6tz1Ildje2l2U-kc_iM7noUO4Rh6zlfU2_ZgrScnMiYgZ5xIWaQqyn8guOawXMBjgSosbc7nQ5lOFsrc3ww2w2Or3iIV393RxfyVVU-vWmMomRBbWQbVzoilnsuR_9Zdl75f893M0QfmYtb6ecJzUXXx7Rw3kxOpfuq5orYv36ox11tVO4fF1k36fiO6oS0hZ2xmh10iqpioR8rtm3l8r7p7Ohu4w64i0v-5mcc-arvp_4X9duF4q2h3Onrexkqul9laM-h5oi7Mfh_tzm2xq15OrsWtr2fBuQxi0

In [20]:
import requests
project_id = PROJECT_ID
endpoint_id = '6051551470581448704'
region = LOCATION
endpoint_url = f'https://{region}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{region}/endpoints/{endpoint_id}:predict'

data = {"instances":[[6, 140, 72, 35, 0, 33.6, 0.62, 48]]}
headers = {
    'Authorization': f'Bearer {token}',
    'Content-Type': 'application/json'
}
# Send the request to the endpoint
response = requests.post(endpoint_url, headers=headers, json=data)

# Handle the response
if response.status_code == 200:
    predictions = response.json()
    print("Predictions:", predictions)
else:
    print("Error:", response.text)

Predictions: {'predictions': [1], 'deployedModelId': '1066175534733459456', 'model': 'projects/523981946985/locations/us-central1/models/2008228301318914048', 'modelDisplayName': 'diabetes-demo-model', 'modelVersionId': '1'}


In [20]:
import xgboost as xgb
import numpy as np

# Sample input data
input_data = {"instances":[[6, 140, 72, 35, 0, 33.6, 0.62, 48]]}


# Convert input data to DMatrix
d_matrix = xgb.DMatrix(input_data)

In [22]:
headers['Authorization']

'Bearer ya29.c.c0ASRK0GbfDBrdhsTbaIHVpSX3-Cj11_Rd-QRcv4joAspuPw4Al6g4wZ8ixVGS7GTgT5Z_aHy4ENnKnGCs6mHIFIRylzYVq-1sXzvAsGjjemokYXP05MfiYZCDieysySdMdkv25OpdyNdcFIdD4c1Y_avMWkc4INwAKAmzzuFaLGajP9vbIoCwV8djok7v64WAVHaD_7RTp2LEuEPojtpTW1ElZVc-9tYOsqyEguvKfjPXEyyaxvA9j-IaqJEh5NMf9lCera0FYWTq-i0ct_b-ldu8Qh97q5hN-7CyUnheRcdLfnP_ALICTZRSA9jE9xFHdzCAClxmyXKndEG_GE46OIUYGDAxtlDG3CzQj8f-kHFZ6xvQmVzLc4oNLm3qLBh_NBkG396AF0pyJy1YxxZh1g7dV2WafbzR6Iw3X9ZS3cQvw67qMRFRoly_1UcyScQZVBalrliQrjnmrjW8S0wYw96kX3to4UOanrxg483rReWMbJpk_yvff5F176tqyO_9R72ik54mjwRUlcX-qcFkivUsF7rsWvJu75qczbFx38Qgngisypm0krsuR5f36J4u4t8VZ-oB6dsoW6wz_RRsml0SWB2viJn_vXp3UBF2QrUJQf_Upz5Fhq8Q-kqyXl_rFognmrudZ5n6OSvlU4lnVbWB91Qv9OcZqv5cf5IXUi0h6tz1Ildje2l2U-kc_iM7noUO4Rh6zlfU2_ZgrScnMiYgZ5xIWaQqyn8guOawXMBjgSosbc7nQ5lOFsrc3ww2w2Or3iIV393RxfyVVU-vWmMomRBbWQbVzoilnsuR_9Zdl75f893M0QfmYtb6ecJzUXXx7Rw3kxOpfuq5orYv36ox11tVO4fF1k36fiO6oS0hZ2xmh10iqpioR8rtm3l8r7p7Ohu4w64i0v-5mcc-arvp_4X9duF4q2h3Onrexkqul9laM-h5oi7Mfh_tzm2xq15OrsWtr2fBuQxi0yJOoo7