In [1]:
%pip install --upgrade --user --quiet google-cloud-aiplatform

[0mNote: you may need to restart the kernel to use updated packages.


In [2]:
# Restart kernel after installs so that your environment can access the new packages
import IPython
import time

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}

In [1]:
from google.cloud import aiplatform as vertex_ai
from google.cloud.aiplatform import Featurestore, EntityType, Feature

In [2]:
# Persistent Resource ID
PERSISTENT_RESOURCE_ID = "ai-takeoff"

In [15]:
# Dynamically retrieve Persistent Resource location
PERSISTENT_RESOURCE_REGION = ""
check_regions = ["us-central1", "asia-southeast1", "europe-west4"]

for region in check_regions:
    shell_output = !gcloud ai persistent-resources list --project=$PROJECT_ID --region=$region
    if "Listed 0 items." not in shell_output:
        print(f"Persistent Resource found in {region}")
        PERSISTENT_RESOURCE_REGION = region

Persistent Resource found in us-central1


In [16]:
GCP_PROJECTS = !gcloud config get-value project
PROJECT_ID = GCP_PROJECTS[0]
BUCKET_NAME = f"{PROJECT_ID}-fraudfinder"
config = !gsutil cat gs://{BUCKET_NAME}/config/notebook_env_v02.py
print(config.n)
exec(config.n)


BUCKET_NAME          = "fraud123-438914-fraudfinder"
PROJECT              = "fraud123-438914"
REGION               = "us-central1"
ID                   = "fipp8"
FEATURESTORE_ID      = "fraudfinder_fipp8"
MODEL_NAME           = "ff_model"
ENDPOINT_NAME        = "ff_model_endpoint"
TRAINING_DS_SIZE     = "1000"
DATA_DIR             = "data"
TRAIN_DATA_DIR       = "train"
CUSTOMER_ENTITY      = "customer"
TERMINAL_ENTITY      = "terminal"
TARGET               = "tx_fraud"



In [17]:
# Check if Service Account is enabled on Persistent Resource
SA_ENABLED = ""

DESCRIBE_PR_OUTPUT = !gcloud ai persistent-resources describe $PERSISTENT_RESOURCE_ID --project=$PROJECT_ID --region=$PERSISTENT_RESOURCE_REGION
PR_DETAILS = " ".join(DESCRIBE_PR_OUTPUT)

if "enableCustomServiceAccount: true" in PR_DETAILS:
    SA_ENABLED = True
    print(f"Service Account is ENABLED on Persistent Resource")
else:
    SA_ENABLED = False
    print(f"Service Account is NOT ENABLED on Persistent Resource")

Service Account is ENABLED on Persistent Resource


In [18]:
# Set the default region for launching jobs.
REGION = PERSISTENT_RESOURCE_REGION

print(f"Project ID:", PROJECT_ID)
print(f"Project Region:", REGION)

Project ID: fraud123-438914
Project Region: us-central1


In [19]:
# Gets the default BUCKET_URI and SERVICE_ACCOUNT if they were not specified by the user.
shell_output = ! gcloud projects describe $PROJECT_ID
project_number = shell_output[-1].split(":")[1].strip().replace("'", "")
SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"
print("Using this default Service Account:", SERVICE_ACCOUNT)

Using this default Service Account: 520607199607-compute@developer.gserviceaccount.com


In [20]:
!gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.admin $BUCKET_NAME

CommandException: Incorrect public member type for binding fraud123-438914-fraudfinder:


In [21]:
# Initialize Vertex AI API.
print("Initializing Vertex AI API.")
vertex_ai.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_NAME)

Initializing Vertex AI API.


In [22]:
from google.cloud import aiplatform

# Project and region
#PROJECT_ID = "your-project-id"  # Replace with your project ID
#REGION = "your-region"          # Replace with your region

# Persistent resource details
PERSISTENT_RESOURCE_ID = "ai-takeoff"
IMAGE_NAME="fraud-detection-image"

# Custom container details
#CONTAINER_URI = f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{FEATURESTORE_ID}/{IMAGE_NAME}" 
CONTAINER_URI= "us-central1-docker.pkg.dev/fraud123-438914/fraudfinder-fvde2/fraud-detection-image"
MACHINE_TYPE = "n1-standard-4" 
ACCELERATOR_TYPE = "NVIDIA_TESLA_T4"  # Or None if not using
ACCELERATOR_COUNT = 1                # Or 0 if not using

#
print(CONTAINER_URI)
#
# Training job specifications
TRAINING_SCRIPT_PATH = "app/main.py"  
#INPUT_DATA_PATH = "gs://your-bucket/data"  
#OUTPUT_DIR = "gs://your-bucket/output" 

# Initialize Vertex AI client
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_NAME)

machine_type="n1-standard-8"
replica_count="1"

# Define the custom training job
job = aiplatform.CustomJob(
    display_name="fraudfinder-job",
    worker_pool_specs=[
        {
            "machine_spec": {
                "machine_type": machine_type,
                
            },
            "replica_count": replica_count,
            "container_spec": {
                "image_uri": CONTAINER_URI,
                "args": [],
            },
        }
    ],
    staging_bucket=BUCKET_NAME,
    persistent_resource_id=PERSISTENT_RESOURCE_ID
)


us-central1-docker.pkg.dev/fraud123-438914/fraudfinder-fvde2/fraud-detection-image


In [23]:
if SA_ENABLED:
    print("Please proceed to next step to start the training job!")
else:
    print("Please open the Terminal in a new tab and run the command -> gcloud auth login```")
    print("----------------------------------------------------------------")
    print("Once done, continue on to start the training job")

Please proceed to next step to start the training job!


In [24]:
if SA_ENABLED:
    print("Running training job with service account")
    job.run(service_account=SERVICE_ACCOUNT)
else:
    print("Running training job with user account")
    job.run()

print("The finetuned models of different trials can be found at: ", output_dir)
print("The finetuned model merged with the base model can be found at: ",merged_model_output_dir,)

Running training job with service account
Creating CustomJob
CustomJob created. Resource name: projects/520607199607/locations/us-central1/customJobs/5133871406450212864
To use this CustomJob in another session:
custom_job = aiplatform.CustomJob.get('projects/520607199607/locations/us-central1/customJobs/5133871406450212864')
View Custom Job:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/5133871406450212864?project=520607199607
CustomJob projects/520607199607/locations/us-central1/customJobs/5133871406450212864 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/520607199607/locations/us-central1/customJobs/5133871406450212864 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/520607199607/locations/us-central1/customJobs/5133871406450212864 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/520607199607/locations/us-central1/customJobs/5133871406450212864 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/52060719960

RuntimeError: Job failed with:
code: 3
message: "The replica workerpool0-0 exited with a non-zero status of 1. Termination reason: Error. To find out more about why your job exited please check the logs: https://console.cloud.google.com/logs/viewer?project=520607199607&resource=ml_job%2Fjob_id%2F5133871406450212864&advancedFilter=resource.type%3D%22ml_job%22%0Aresource.labels.job_id%3D%225133871406450212864%22"


In [None]:
import random
import string
from typing import Union

import pandas as pd
from google.cloud import bigquery

# Generate unique ID to help w/ unique naming of certain pieces
ID = "".join(random.choices(string.ascii_lowercase + string.digits, k=5))

# Replace Region here
REGION = "us-central1"

# static parameters
GCP_PROJECTS = !gcloud config get-value project
PROJECT_ID = GCP_PROJECTS[0]
BUCKET_NAME = f"{PROJECT_ID}-fraudfinder"
UPLOAD_BUCKET = f"{PROJECT_ID}-model-upload"
AGENT_BUCKET = f"{PROJECT_ID}-ai-workshops"
TRAINING_DS_SIZE = 1000

In [None]:
config = f"""
BUCKET_NAME: \"{BUCKET_NAME}\"
PROJECT: \"{PROJECT_ID}\"
PROJECT_ID: \"{PROJECT_ID}\"
REGION: \"{REGION}\"
ID: \"fvde2\"
FEATURESTORE_ID: \"fraudfinder_{ID}\"
MODEL_NAME: \"ff_model\"
ENDPOINT_NAME: \"ff_model_endpoint\"
TRAINING_DS_SIZE: \"{TRAINING_DS_SIZE}\"
DATA_DIR: "data"
TRAIN_DATA_DIR: "train"
CUSTOMER_ENTITY: "customer"
TERMINAL_ENTITY: "terminal"
TARGET: "tx_fraud"
CUSTOMER_ENTITY_ID: customer
CUSTOMER_ENTITY_ID_FIELD: customer_id
TERMINAL_ENTITY_ID: terminal
TERMINALS_ENTITY_ID_FIELD: terminal_id
FEATUREVIEW_ID: \"fraudfinder_view_{ID}\"
#NETWORK: fraud-finder-network
#SUBNET: https://www.googleapis.com/compute/v1/projects/fraud-finder-lab/regions/us-central1/subnetworks/us-central1
#MODEL_REGISTRY: ff_model
RAW_BQ_TRANSACTION_TABLE_URI: fraud-finder-lab.tx.tx
RAW_BQ_LABELS_TABLE_URI: fraud-finder-lab.tx.txlabels
FEATURES_BQ_TABLE_URI: fraud-finder-lab.tx.wide_features_table
FEATURE_TIME: feature_ts
ONLINE_STORAGE_NODES: 1
SUBSCRIPTION_NAME: ff-tx-for-feat-eng-sub
SUBSCRIPTION_PATH: projects/fraud-finder-lab/subscriptions/ff-tx-for-feat-eng-sub
DROP_COLUMNS:
- timestamp
- entity_type_customer
- entity_type_terminal
FEAT_COLUMNS:
- customer_id_avg_amount_14day_window
- customer_id_avg_amount_15min_window
- customer_id_avg_amount_1day_window
- customer_id_avg_amount_30min_window
- customer_id_avg_amount_60min_window
- customer_id_avg_amount_7day_window
- customer_id_nb_tx_14day_window
- customer_id_nb_tx_15min_window
- customer_id_nb_tx_1day_window
- customer_id_nb_tx_30min_window
- customer_id_nb_tx_60min_window
- customer_id_nb_tx_7day_window
- terminal_id_avg_amount_15min_window
- terminal_id_avg_amount_30min_window
- terminal_id_avg_amount_60min_window
- terminal_id_nb_tx_14day_window
- terminal_id_nb_tx_15min_window
- terminal_id_nb_tx_1day_window
- terminal_id_nb_tx_30min_window
- terminal_id_nb_tx_60min_window
- terminal_id_nb_tx_7day_window
- terminal_id_risk_14day_window
- terminal_id_risk_1day_window
- terminal_id_risk_7day_window
- tx_amount
TARGET_COLUMN: tx_fraud
DATA_SCHEMA:
  timestamp: object
  tx_amount: float64
  tx_fraud: Int64
  entity_type_customer: Int64
  customer_id_nb_tx_1day_window: Int64
  customer_id_nb_tx_7day_window: Int64
  customer_id_nb_tx_14day_window: Int64
  customer_id_avg_amount_1day_window: float64
  customer_id_avg_amount_7day_window: float64
  customer_id_avg_amount_14day_window: float64
  customer_id_nb_tx_15min_window: Int64
  customer_id_avg_amount_15min_window: float64
  customer_id_nb_tx_30min_window: Int64
  customer_id_avg_amount_30min_window: float64
  customer_id_nb_tx_60min_window: Int64
  customer_id_avg_amount_60min_window: float64
  entity_type_terminal: Int64
  terminal_id_nb_tx_1day_window: Int64
  terminal_id_nb_tx_7day_window: Int64
  terminal_id_nb_tx_14day_window: Int64
  terminal_id_risk_1day_window: float64
  terminal_id_risk_7day_window: float64
  terminal_id_risk_14day_window: float64
  terminal_id_nb_tx_15min_window: Int64
  terminal_id_avg_amount_15min_window: float64
  terminal_id_nb_tx_30min_window: Int64
  terminal_id_avg_amount_30min_window: float64
  terminal_id_nb_tx_60min_window: Int64
  terminal_id_avg_amount_60min_window: float64
MODEL_NAME: ff_model
EXPERIMENT_NAME: \"ff-experiment-{ID}\"
DATA_URI: \"gs://{BUCKET_NAME}/data\"
TRAIN_DATA_URI: \"gs://{BUCKET_NAME}/data/train\"
READ_INSTANCES_TABLE: \"ground_truth_{ID}\"
READ_INSTANCES_URI: \"bq://{PROJECT_ID}.tx.ground_truth_8wc8m\"
DATASET_NAME: fraud_finder_dataset_8wc8m
#JOB_NAME: fraudfinder-train-xgb-8wc8m
ENDPOINT_NAME: ff_model_endpoint
#MODEL_SERVING_IMAGE_URI: "us-docker.pkg.dev/vertex-ai/prediction/xgboost-cpu.1-7:latest"
#IMAGE_REPOSITORY: fraudfinder-8wc8m
#IMAGE_NAME: dask-xgb-classificator
IMAGE_TAG: latest
#IMAGE_URI: "us-central1-docker.pkg.dev/fraud-finder-lab/fraudfinder-8wc8m/dask-xgb-classificator:latest"
#TRAIN_COMPUTE: e2-standard-4
#DEPLOY_COMPUTE: n1-standard-4
BASE_IMAGE: "python:3.10"
PIPELINE_NAME: fraud-finder-xgb-pipeline-8wc8m
PIPELINE_ROOT: "gs://fraud-finder-lab-fraudfinder/pipelines"
BQ_DATASET: tx
METRICS_URI: "gs://fraud-finder-lab-fraudfinder/deliverables/metrics.json"
AVG_PR_THRESHOLD: 0.2
MODEL_THRESHOLD: 0.5
AVG_PR_CONDITION: avg_pr_condition
PERSISTENT_RESOURCE_ID: null
REPLICA_COUNT: 1
SERVICE_ACCOUNT: 949651794396-compute@developer.gserviceaccount.com
"""