In [1]:
import pandas as pd

In [2]:
PROJECT_ID = "algebraic-notch-313219"

In [3]:
BUCKET_NAME = "gs://my-first-bucket"

In [4]:
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "algebraic-notch-313219":
    # Get your GCP project id from gcloud
    shell_output = ! gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)

Project ID: algebraic-notch-313219


In [5]:
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

In [6]:
if BUCKET_NAME == "" or BUCKET_NAME is None or BUCKET_NAME == "gs://my-first-bucket":
    BUCKET_NAME = "gs://" + PROJECT_ID + "aip-" + TIMESTAMP

In [7]:
import google.cloud.aiplatform as aip

In [8]:
aip.init(project=PROJECT_ID, staging_bucket=BUCKET_NAME)

In [9]:
IMPORT_FILE = "gs://cloud-ai-platform-47ebb270-074a-4330-aaba-fff9e976aee4/adult_income"

In [10]:
count = ! gsutil cat $IMPORT_FILE | wc -l
print("Number of Examples", int(count[0]))

print("First 10 rows")
! gsutil cat $IMPORT_FILE | head

heading = ! gsutil cat $IMPORT_FILE | head -n1
label_column = str(heading).split(",")[-1].split("'")[0]
print("Label Column Name", label_column)
if label_column is None:
    raise Exception("label column missing")

Number of Examples 32562
First 10 rows
age,workclass,functional_weight,education,education_num,marital_status,occupation,relationship,race,sex,capital_gain,capital_loss,hours_per_week,native_country,income_bracket
39, Private,297847, 9th,5, Married-civ-spouse, Other-service, Wife, Black, Female,3411,0,34, United-States, <=50K
72, Private,74141, 9th,5, Married-civ-spouse, Exec-managerial, Wife, Asian-Pac-Islander, Female,0,0,48, United-States, >50K
45, Private,178215, 9th,5, Married-civ-spouse, Machine-op-inspct, Wife, White, Female,0,0,40, United-States, >50K
31, Private,86958, 9th,5, Married-civ-spouse, Exec-managerial, Wife, White, Female,0,0,40, United-States, <=50K
55, Private,176012, 9th,5, Married-civ-spouse, Tech-support, Wife, White, Female,0,0,23, United-States, <=50K
30, Private,61272, 9th,5, Married-civ-spouse, Machine-op-inspct, Wife, White, Female,0,0,40, Portugal, <=50K
46, Self-emp-inc,161386, 9th,5, Married-civ-spouse, Adm-clerical, Wife, White, Female,0,0,50, United-St

In [11]:
dataset = aip.TabularDataset.create(
    display_name="Income" + "_" + TIMESTAMP, gcs_source=[IMPORT_FILE]
)

print(dataset.resource_name)


INFO:google.cloud.aiplatform.datasets.dataset:Creating TabularDataset
INFO:google.cloud.aiplatform.datasets.dataset:Create TabularDataset backing LRO: projects/301473280362/locations/us-central1/datasets/7899291756175294464/operations/4972535358022483968
INFO:google.cloud.aiplatform.datasets.dataset:TabularDataset created. Resource name: projects/301473280362/locations/us-central1/datasets/7899291756175294464
INFO:google.cloud.aiplatform.datasets.dataset:To use this TabularDataset in another session:
INFO:google.cloud.aiplatform.datasets.dataset:ds = aiplatform.TabularDataset('projects/301473280362/locations/us-central1/datasets/7899291756175294464')
projects/301473280362/locations/us-central1/datasets/7899291756175294464


In [12]:
dag = aip.AutoMLTabularTrainingJob(
    display_name="income_" + TIMESTAMP,
    optimization_prediction_type="classification",
    optimization_objective="minimize-log-loss",
)

print(dag)

<google.cloud.aiplatform.training_jobs.AutoMLTabularTrainingJob object at 0x7fbd8662ae10>


In [13]:
model = dag.run(
    dataset=dataset,
    model_display_name="income_" + TIMESTAMP,
    training_fraction_split=0.6,
    validation_fraction_split=0.2,
    test_fraction_split=0.2,
    budget_milli_node_hours=8000,
    disable_early_stopping=False,
    target_column=label_column,
)


INFO:google.cloud.aiplatform.training_jobs:No column transformations provided, so now retrieving columns from dataset in order to set default column transformations.
INFO:google.cloud.aiplatform.training_jobs:The column transformation of type 'auto' was set for the following columns: ['sex', 'workclass', 'age', 'relationship', 'education_num', 'native_country', 'education', 'race', 'hours_per_week', 'marital_status', 'occupation', 'functional_weight', 'capital_gain', 'capital_loss'].
INFO:google.cloud.aiplatform.training_jobs:View Training:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/8807292097892712448?project=301473280362
INFO:google.cloud.aiplatform.training_jobs:AutoMLTabularTrainingJob projects/301473280362/locations/us-central1/trainingPipelines/8807292097892712448 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.training_jobs:AutoMLTabularTrainingJob projects/301473280362/locations/us-central1/trainingPipelines/8807

KeyboardInterrupt: 

In [None]:
models = aip.Model.list(filter="display_name=income_" + TIMESTAMP)

In [None]:
client_options = {"api_endpoint": f"{REGION}-aiplatform.googleapis.com"}
model_service_client = aip.gapic.ModelServiceClient(client_options=client_options)

In [None]:
model_evaluations = model_service_client.list_model_evaluations(
    parent=models[0].resource_name
)

In [None]:
model_evaluation = list(model_evaluations)[0]
print(model_evaluation)