# Import libraries

In [None]:
import glob
import os
import sys

from google.cloud import aiplatform

# Setup variables

In [None]:
PROJECT_ID = "airesearch-1409"
BUCKET_NAME = "gs://attributes_models/base_model"
REGION = "europe-west4"
SERVICE_ACCOUNT = "vertex-ai-training@airesearch-1409.iam.gserviceaccount.com"
IMAGE_URI = "europe-docker.pkg.dev/vertex-ai/training/pytorch-gpu.1-12:latest"

TB_RESOURCE_NAME = f"projects/184243724142/locations/{REGION}/tensorboards/4596222486894346240"

DISPLAY_NAME = "attributes_model"
MODULE_NAME = "trainer.train"
GCS_OUTPUT_URI_PREFIX = f"{BUCKET_NAME}/{DISPLAY_NAME}"

In [None]:
os.environ["BUCKET_NAME"] = BUCKET_NAME
os.environ["SYS_EXE_PY_CONDA"] = sys.executable

# Setup VertxAI

In [None]:
aiplatform.init(
    project=PROJECT_ID,
    staging_bucket=BUCKET_NAME,
    location=REGION,
)

In [None]:
tensorboard = aiplatform.Tensorboard(TB_RESOURCE_NAME)

# Make package

In [None]:
%%bash
set -e
cd ../
$SYS_EXE_PY_CONDA -m build
gsutil cp ./dist/*.whl $BUCKET_NAME

In [None]:
package_name = os.path.basename(glob.glob("../dist/*.whl")[0])
package_name

# Custom Job

In [None]:
replica_count = 1
machine_type = "n1-standard-4"
accelerator_count = 1
accelerator_type = "NVIDIA_TESLA_T4"

args = [
    "--batch_size",
    "128",
    "--num_epochs",
    "20",
]

In [None]:
custom_training_job = aiplatform.CustomPythonPackageTrainingJob(
    display_name=DISPLAY_NAME,
    python_package_gcs_uri= f"{BUCKET_NAME}/{package_name}",
    python_module_name=MODULE_NAME,
    container_uri=IMAGE_URI,
)

In [None]:
custom_training_job.run(
    args=args,
    base_output_dir=GCS_OUTPUT_URI_PREFIX,
    replica_count=replica_count,
    machine_type=machine_type,
    accelerator_count=accelerator_count,
    accelerator_type=accelerator_type,
    tensorboard=tensorboard.resource_name,
    service_account=SERVICE_ACCOUNT,
)