## Build custom container for Vertex training

In [None]:
!pwd

## Load env config

In [None]:
# naming convention for all cloud resources
VERSION        = "v1"                  # TODO
PREFIX         = f'ndr-{VERSION}'      # TODO

print(f"PREFIX = {PREFIX}")

In [None]:
# staging GCS
GCP_PROJECTS             = !gcloud config get-value project
PROJECT_ID               = GCP_PROJECTS[0]

# GCS bucket and paths
BUCKET_NAME              = f'{PREFIX}-{PROJECT_ID}-bucket'
BUCKET_URI               = f'gs://{BUCKET_NAME}'

config = !gsutil cat {BUCKET_URI}/config/notebook_env.py
print(config.n)
exec(config.n)

In [None]:
DOCKERNAME                = 'rank'

VERSION                   = "v11"
# APP                       = 'sp'
MODEL_TYPE                = 'rank'
# FRAMEWORK                 = 'tfrs'
MODEL_ROOT_NAME           = f'{APP}-{MODEL_TYPE}-{FRAMEWORK}-{VERSION}'

REPOSITORY                = "tfrs-ranking"  # f'{APP}-{FRAMEWORK}'
IMAGE_NAME                = f'{MODEL_ROOT_NAME}'
REMOTE_IMAGE_NAME         = f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{REPOSITORY}/{IMAGE_NAME}"

print(f"DOCKERNAME      : {DOCKERNAME}")
print(f"MODEL_ROOT_NAME : {MODEL_ROOT_NAME}")

print(f"REPOSITORY        = {REPOSITORY}")
print(f"IMAGE_NAME        = {IMAGE_NAME}")
print(f"REMOTE_IMAGE_NAME = {REMOTE_IMAGE_NAME}")

In [None]:
! gsutil ls $BUCKET_URI

## Imports

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

## Create training package

In [None]:
REPO_SRC = 'src'
TRAIN_SUBFOLDER = 'ranking'

# ! rm -rf $REPO_DOCKER_PATH_PREFIX/$TRAIN_SUBFOLDER
# ! mkdir -p $REPO_DOCKER_PATH_PREFIX/$TRAIN_SUBFOLDER
# ! touch $REPO_DOCKER_PATH_PREFIX/$TRAIN_SUBFOLDER/__init__.py

# Build Image

In [None]:
print(f"DOCKERNAME        = {DOCKERNAME}")
print(f"REPOSITORY        = {REPOSITORY}")
print(f"IMAGE_NAME        = {IMAGE_NAME}")
print(f"REMOTE_IMAGE_NAME = {REMOTE_IMAGE_NAME}")

## Create Artifact Repository
If you don't have an existing artifact repository, create one using the gcloud command below

In [None]:
! gcloud artifacts repositories create $REPOSITORY --repository-format=docker --location=$LOCATION

## Local Docker build
Provide a name for your dockerfile and make sure you are authenticated

In [None]:
! gcloud auth configure-docker $REGION-docker.pkg.dev --quiet

### Requirements & Config files

In [None]:
%%writefile {REPO_SRC}/{TRAIN_SUBFOLDER}/requirements.txt
google-cloud-aiplatform[cloud_profiler]>=1.26.1
tensorflow-recommenders==0.7.2
tensorboard==2.13.0
tensorboard-data-server==0.7.1
tensorboard-plugin-profile==2.13.1
tensorboard-plugin-wit==1.8.1
tensorboardX==2.6
tensorflow-io==0.27.0
protobuf==3.19.6

In [None]:
%%writefile {REPO_SRC}/{TRAIN_SUBFOLDER}/train_config.py
PROJECT_ID='hybrid-vertex'
TRACK_HISTORY = 5

## Create Dockerfile

In [None]:
%%writefile {REPO_DOCKER_PATH_PREFIX}/{DOCKERNAME}

# FROM tensorflow/tensorflow:2.10.1-gpu
FROM gcr.io/deeplearning-platform-release/tf-gpu.2-11

ENV PYTHONUNBUFFERED True

# Copies the trainer code to the docker image.
COPY ranking /ranking

WORKDIR /ranking

RUN pip install --upgrade pip
RUN pip install -r ./requirements.txt

RUN apt update && apt -y install nvtop

# RUN export PYTHONPATH=${PYTHONPATH}:${APP_HOME}/

## Build Image Locally
run commands in terminal window

In [None]:
# # set variables if running in terminal
print("copy these commands into terminal:\n")
print(f"export REPO_SRC={REPO_SRC}")
print(f"export REMOTE_IMAGE_NAME={REMOTE_IMAGE_NAME}")
print(f"export DOCKERNAME={DOCKERNAME}")
print(f"docker build -t $REMOTE_IMAGE_NAME -f $REPO_SRC/Dockerfile_$DOCKERNAME .")

# !docker build -t $REMOTE_IMAGE_NAME -f $REPO_SRC/Dockerfile_$DOCKERNAME .

## Push container to Registry

In [None]:
print("copy this command into terminal:\n")
print(f"docker push $REMOTE_IMAGE_NAME")

# ! docker push $REMOTE_IMAGE_NAME

## (Optional) Build Image with Cloud Build

**TODO**

In [None]:
# Docker definitions for training
IMAGE_NAME               = f'{MODEL_ROOT_NAME}'
IMAGE_URI                = f'gcr.io/{PROJECT_ID}/{IMAGE_NAME}'

DOCKERNAME               = 'Dockerfile_rank'
MACHINE_TYPE             = 'e2-highcpu-32'
FILE_LOCATION            = './src'

print(f"DOCKERNAME       : {DOCKERNAME}")
print(f"IMAGE_URI        : {IMAGE_URI}")
print(f"MACHINE_TYPE     : {MACHINE_TYPE}")
print(f"FILE_LOCATION    : {FILE_LOCATION}")

In [None]:
# %%writefile {REPO_SRC}/cloudbuild.yaml

# steps:
# - name: 'gcr.io/cloud-builders/docker'
#   args: ['build', '-t', '$_IMAGE_URI', '$_FILE_LOCATION', '-f', '$_FILE_LOCATION/Dockerfile_$_DOCKERNAME']
# images:
# - '$_IMAGE_URI'

In [None]:
# print(f"DOCKERNAME: {DOCKERNAME}")
# print(f"IMAGE_URI: {IMAGE_URI}")
# print(f"FILE_LOCATION: {FILE_LOCATION}")
# print(f"MACHINE_TYPE: {MACHINE_TYPE}")

### set gcloudignore

In [None]:
# ! gcloud config set gcloudignore/enabled true

In [None]:
# !gcloud meta list-files-for-upload

In [None]:
# ! gcloud builds submit --config src/cloudbuild.yaml \
#     --substitutions _DOCKERNAME=$DOCKERNAME,_IMAGE_URI=$IMAGE_URI,_FILE_LOCATION=$FILE_LOCATION \
#     --timeout=2h \
#     --machine-type=$MACHINE_TYPE

**Finished**