In [1]:
import pandas as pd

In [1]:
PROJECT_ID = "algebraic-notch-313219"

In [2]:
BUCKET_NAME = "gs://my-first-bucket"

In [3]:
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "algebraic-notch-313219":
    # Get your GCP project id from gcloud
    shell_output = ! gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)

Project ID: algebraic-notch-313219


In [4]:
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

In [5]:
if BUCKET_NAME == "" or BUCKET_NAME is None or BUCKET_NAME == "gs://my-first-bucket":
    BUCKET_NAME = "gs://" + PROJECT_ID + "aip-" + TIMESTAMP

In [6]:
import google.cloud.aiplatform as aip

In [7]:
aip.init(project=PROJECT_ID, staging_bucket=BUCKET_NAME)

In [10]:
IMPORT_FILE = "gs://cloud-ai-platform-47ebb270-074a-4330-aaba-fff9e976aee4/datasets/adult_income"

In [11]:
count = ! gsutil cat $IMPORT_FILE | wc -l
print("Number of Examples", int(count[0]))

print("First 10 rows")
! gsutil cat $IMPORT_FILE | head

heading = ! gsutil cat $IMPORT_FILE | head -n1
label_column = str(heading).split(",")[-1].split("'")[0]
print("Label Column Name", label_column)
if label_column is None:
    raise Exception("label column missing")

Number of Examples 32562
First 10 rows
age,workclass,functional_weight,education,education_num,marital_status,occupation,relationship,race,sex,capital_gain,capital_loss,hours_per_week,native_country,income_bracket
39, Private,297847, 9th,5, Married-civ-spouse, Other-service, Wife, Black, Female,3411,0,34, United-States, <=50K
72, Private,74141, 9th,5, Married-civ-spouse, Exec-managerial, Wife, Asian-Pac-Islander, Female,0,0,48, United-States, >50K
45, Private,178215, 9th,5, Married-civ-spouse, Machine-op-inspct, Wife, White, Female,0,0,40, United-States, >50K
31, Private,86958, 9th,5, Married-civ-spouse, Exec-managerial, Wife, White, Female,0,0,40, United-States, <=50K
55, Private,176012, 9th,5, Married-civ-spouse, Tech-support, Wife, White, Female,0,0,23, United-States, <=50K
30, Private,61272, 9th,5, Married-civ-spouse, Machine-op-inspct, Wife, White, Female,0,0,40, Portugal, <=50K
46, Self-emp-inc,161386, 9th,5, Married-civ-spouse, Adm-clerical, Wife, White, Female,0,0,50, United-St

In [12]:
dataset = aip.TabularDataset.create(
    display_name="Income" + "_" + TIMESTAMP, gcs_source=[IMPORT_FILE]
)

print(dataset.resource_name)


INFO:google.cloud.aiplatform.datasets.dataset:Creating TabularDataset
INFO:google.cloud.aiplatform.datasets.dataset:Create TabularDataset backing LRO: projects/301473280362/locations/us-central1/datasets/3737965700484956160/operations/2473885288296873984
INFO:google.cloud.aiplatform.datasets.dataset:TabularDataset created. Resource name: projects/301473280362/locations/us-central1/datasets/3737965700484956160
INFO:google.cloud.aiplatform.datasets.dataset:To use this TabularDataset in another session:
INFO:google.cloud.aiplatform.datasets.dataset:ds = aiplatform.TabularDataset('projects/301473280362/locations/us-central1/datasets/3737965700484956160')
projects/301473280362/locations/us-central1/datasets/3737965700484956160


In [13]:
dag = aip.AutoMLTabularTrainingJob(
    display_name="income_" + TIMESTAMP,
    optimization_prediction_type="classification",
    optimization_objective="minimize-log-loss",
)

print(dag)

<google.cloud.aiplatform.training_jobs.AutoMLTabularTrainingJob object at 0x7fbde7c020d0>


In [14]:
model = dag.run(
    dataset=dataset,
    model_display_name="income_" + TIMESTAMP,
    training_fraction_split=0.6,
    validation_fraction_split=0.2,
    test_fraction_split=0.2,
    budget_milli_node_hours=8000,
    disable_early_stopping=False,
    target_column=label_column,
)


INFO:google.cloud.aiplatform.training_jobs:No column transformations provided, so now retrieving columns from dataset in order to set default column transformations.
INFO:google.cloud.aiplatform.training_jobs:The column transformation of type 'auto' was set for the following columns: ['age', 'workclass', 'capital_gain', 'native_country', 'education_num', 'functional_weight', 'marital_status', 'race', 'sex', 'hours_per_week', 'occupation', 'capital_loss', 'relationship', 'education'].
INFO:google.cloud.aiplatform.training_jobs:View Training:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/2506087716131700736?project=301473280362
INFO:google.cloud.aiplatform.training_jobs:AutoMLTabularTrainingJob projects/301473280362/locations/us-central1/trainingPipelines/2506087716131700736 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.training_jobs:AutoMLTabularTrainingJob projects/301473280362/locations/us-central1/trainingPipelines/2506

In [15]:
models = aip.Model.list(filter="display_name=income_" + TIMESTAMP)

In [17]:
REGION = "us-central1"

In [18]:
client_options = {"api_endpoint": f"{REGION}-aiplatform.googleapis.com"}
model_service_client = aip.gapic.ModelServiceClient(client_options=client_options)

In [19]:
model_evaluations = model_service_client.list_model_evaluations(
    parent=models[0].resource_name
)

In [20]:
model_evaluation = list(model_evaluations)[0]
print(model_evaluation)

name: "projects/301473280362/locations/us-central1/models/4597669444196499456/evaluations/8520853186664803680"
metrics_schema_uri: "gs://google-cloud-aiplatform/schema/modelevaluation/classification_metrics_1.0.0.yaml"
metrics {
  struct_value {
    fields {
      key: "auPrc"
      value {
        number_value: 0.95041203
      }
    }
    fields {
      key: "auRoc"
      value {
        number_value: 0.9484026
      }
    }
    fields {
      key: "confidenceMetrics"
      value {
        list_value {
          values {
            struct_value {
              fields {
                key: "confidenceThreshold"
                value {
                  number_value: -0.01
                }
              }
              fields {
                key: "f1Score"
                value {
                  number_value: 0.6666667
                }
              }
              fields {
                key: "f1ScoreAt1"
                value {
                  number_value: 0.6666667
     

In [21]:
endpoint = model.deploy(machine_type="n1-standard-2")

INFO:google.cloud.aiplatform.models:Creating Endpoint
INFO:google.cloud.aiplatform.models:Create Endpoint backing LRO: projects/301473280362/locations/us-central1/endpoints/8212287532231032832/operations/6075076140333006848
INFO:google.cloud.aiplatform.models:Endpoint created. Resource name: projects/301473280362/locations/us-central1/endpoints/8212287532231032832
INFO:google.cloud.aiplatform.models:To use this Endpoint in another session:
INFO:google.cloud.aiplatform.models:endpoint = aiplatform.Endpoint('projects/301473280362/locations/us-central1/endpoints/8212287532231032832')
INFO:google.cloud.aiplatform.models:Deploying model to Endpoint : projects/301473280362/locations/us-central1/endpoints/8212287532231032832
INFO:google.cloud.aiplatform.models:Deploy Endpoint model backing LRO: projects/301473280362/locations/us-central1/endpoints/8212287532231032832/operations/555914796990464000
INFO:google.cloud.aiplatform.models:Endpoint model deployed. Resource name: projects/301473280362

In [24]:
INSTANCE = { 
    "age": "43",
    "workclass": "Private",
    "functional_weight": "262439",
    "education": "HS-grad",
    "education_num": "9",
    "marital_status": "Married-civ-spouse",
    "occupation": "Other-service",
    "relationship": "Wife",
    "race": "White",
    "sex": "Male",
    "capital_gain": "0",
    "capital_loss": "0",
    "hours_per_week": "40",
    "native_country": "United-States",
}

In [25]:
instances_list = [INSTANCE]

prediction = endpoint.explain(instances_list)
print(prediction)

Prediction(predictions=[{'scores': [0.855384111404419, 0.1446159183979034], 'classes': [' <=50K', ' >50K']}], deployed_model_id='1798757042576424960', explanations=[attributions {
  baseline_output_value: 0.46345165371894836
  instance_output_value: 0.855384111404419
  feature_attributions {
    struct_value {
      fields {
        key: "age"
        value {
          number_value: -0.01891795947001531
        }
      }
      fields {
        key: "capital_gain"
        value {
          number_value: 0.0
        }
      }
      fields {
        key: "capital_loss"
        value {
          number_value: 0.0
        }
      }
      fields {
        key: "education"
        value {
          number_value: 0.0
        }
      }
      fields {
        key: "education_num"
        value {
          number_value: 0.08390970184252812
        }
      }
      fields {
        key: "functional_weight"
        value {
          number_value: 0.02690481910338769
        }
      }
      fields {


In [26]:
import numpy as np

In [27]:
try:
    label = np.argmax(prediction[0][0]["scores"])
    cls = prediction[0][0]["classes"][label]
    print("Predicted Value:", cls, prediction[0][0]["scores"][label])
except:
    pass

Predicted Value:  <=50K 0.855384111404419
