## Build custom container for Vertex training

In [1]:
!pwd

/home/jupyter/jw-repo2/spotify_mpd_two_tower


In [2]:
# naming convention for all cloud resources
VERSION        = "v1"                  # TODO
PREFIX         = f'ndr-{VERSION}'      # TODO

print(f"PREFIX = {PREFIX}")

PREFIX = ndr-v1


In [3]:
# staging GCS
GCP_PROJECTS             = !gcloud config get-value project
PROJECT_ID               = GCP_PROJECTS[0]

# GCS bucket and paths
BUCKET_NAME              = f'{PREFIX}-{PROJECT_ID}-bucket'
BUCKET_URI               = f'gs://{BUCKET_NAME}'

config = !gsutil cat {BUCKET_URI}/config/notebook_env.py
print(config.n)
exec(config.n)


PROJECT_ID               = "hybrid-vertex"
PROJECT_NUM              = "934903580331"
LOCATION                 = "us-central1"

REGION                   = "us-central1"
BQ_LOCATION              = "US"
VPC_NETWORK_NAME         = "ucaip-haystack-vpc-network"

VERTEX_SA                = "934903580331-compute@developer.gserviceaccount.com"

PREFIX                   = "ndr-v1"
VERSION                  = "v1"

APP                      = "sp"
MODEL_TYPE               = "2tower"
FRAMEWORK                = "tfrs"
DATA_VERSION             = "v1"
TRACK_HISTORY            = "5"

BUCKET_NAME              = "ndr-v1-hybrid-vertex-bucket"
BUCKET_URI               = "gs://ndr-v1-hybrid-vertex-bucket"
SOURCE_BUCKET            = "spotify-million-playlist-dataset"

DATA_GCS_PREFIX          = "data"
DATA_PATH                = "gs://ndr-v1-hybrid-vertex-bucket/data"
VOCAB_SUBDIR             = "vocabs"
VOCAB_FILENAME           = "vocab_dict.pkl"

CANDIDATE_PREFIX         = "candidates"
TRAIN_DIR_PREFIX      

In [4]:
! gsutil ls $BUCKET_URI

gs://ndr-v1-hybrid-vertex-bucket/config/
gs://ndr-v1-hybrid-vertex-bucket/data/
gs://ndr-v1-hybrid-vertex-bucket/local-train-v1/
gs://ndr-v1-hybrid-vertex-bucket/scale-training-v1/


## Imports

In [5]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Build Image

In [6]:
print(f"DOCKERNAME        = {DOCKERNAME}")
print(f"REPOSITORY        = {REPOSITORY}")
print(f"IMAGE_NAME        = {IMAGE_NAME}")
print(f"REMOTE_IMAGE_NAME = {REMOTE_IMAGE_NAME}")

DOCKERNAME        = tfrs
REPOSITORY        = ndr-v1-spotify
IMAGE_NAME        = train-v1
REMOTE_IMAGE_NAME = us-central1-docker.pkg.dev/hybrid-vertex/ndr-v1-spotify/train-v1


## Create Artifact Repository
If you don't have an existing artifact repository, create one using the gcloud command below

In [8]:
# ! gcloud artifacts repositories create $REPOSITORY --repository-format=docker --location=$LOCATION

## Local Docker build
Provide a name for your dockerfile and make sure you are authenticated

In [9]:
! gcloud auth configure-docker $REGION-docker.pkg.dev --quiet


{
  "credHelpers": {
    "gcr.io": "gcloud",
    "us.gcr.io": "gcloud",
    "eu.gcr.io": "gcloud",
    "asia.gcr.io": "gcloud",
    "staging-k8s.gcr.io": "gcloud",
    "marketplace.gcr.io": "gcloud",
    "us-central1-docker.pkg.dev": "gcloud"
  }
}
Adding credentials for: us-central1-docker.pkg.dev
gcloud credential helpers already registered correctly.


### Requirements & Config files

In [10]:
%%writefile {REPO_SRC}/two_tower_jt/train_config.py
PROJECT_ID='hybrid-vertex'
TRACK_HISTORY = 5

Overwriting src/two_tower_jt/train_config.py


In [11]:
%%writefile {REPO_SRC}/two_tower_jt/requirements.txt
google-cloud-aiplatform[cloud_profiler]>=1.26.1
tensorflow-recommenders==0.7.2
tensorboard==2.10.1
# tensorboard==2.11.2 
tensorboard-data-server==0.6.1
tensorboard-plugin-profile==2.11.1
tensorflow-io==0.27.0
tensorboard-plugin-wit
protobuf==3.19.6

Overwriting src/two_tower_jt/requirements.txt


In [12]:
# tensorboard==2.13.0
# tensorboard-data-server==0.7.1
# tensorboard-plugin-profile==2.13.1
# tensorboard-plugin-wit==1.8.1
# tensorboardX==2.6
# tensorflow-io==0.27.0

In [13]:

# tensorboard==2.10.1
# # tensorboard==2.11.2 
# tensorboard-data-server==0.6.1
# tensorboard-plugin-profile==2.11.1
# tensorflow-io==0.27.0

## Create Dockerfile

In [14]:
%%writefile {REPO_SRC}/Dockerfile_{DOCKERNAME}

FROM tensorflow/tensorflow:2.11.0-gpu
# FROM gcr.io/deeplearning-platform-release/tf-gpu.2-11

# ENV PYTHONUNBUFFERED True
ENV APP_HOME /workspace

WORKDIR $APP_HOME

COPY src/two_tower_jt $APP_HOME/src/two_tower_jt

# RUN pip install --upgrade pip

RUN pip install -r $APP_HOME/src/two_tower_jt/requirements.txt

RUN apt update && apt -y install nvtop

RUN export PYTHONPATH=${PYTHONPATH}:${APP_HOME}/

Overwriting src/Dockerfile_tfrs


## Build Image Locally

run commands in terminal window

In [15]:
# # set variables if running in terminal
print("copy these commands into terminal:\n")
print(f"export REPO_SRC={REPO_SRC}")
print(f"export REMOTE_IMAGE_NAME={REMOTE_IMAGE_NAME}")
print(f"export DOCKERNAME={DOCKERNAME}")
print(f"docker build -t $REMOTE_IMAGE_NAME -f $REPO_SRC/Dockerfile_$DOCKERNAME .")

# !docker build -t $REMOTE_IMAGE_NAME -f $REPO_SRC/Dockerfile_$DOCKERNAME .

copy these commands into terminal:

export REPO_SRC=src
export REMOTE_IMAGE_NAME=us-central1-docker.pkg.dev/hybrid-vertex/ndr-v1-spotify/train-v1
export DOCKERNAME=tfrs
docker build -t $REMOTE_IMAGE_NAME -f $REPO_SRC/Dockerfile_$DOCKERNAME .


## Push container to Registry

In [16]:
print("copy this command into terminal:\n")
print(f"docker push $REMOTE_IMAGE_NAME")

# ! docker push $REMOTE_IMAGE_NAME

copy this command into terminal:

docker push $REMOTE_IMAGE_NAME


## (Optional) Build Image with Cloud Build

In [None]:
# VERSION= "trainerv7"
# APP='sp'
# MODEL_TYPE='2tower'
# FRAMEWORK = 'tfrs'
# MODEL_ROOT_NAME = f'{APP}-{MODEL_TYPE}-{FRAMEWORK}-{VERSION}'

# print(f"MODEL_ROOT_NAME: {MODEL_ROOT_NAME}")

In [None]:
# # Docker definitions for training
# IMAGE_NAME = f'{MODEL_ROOT_NAME}-tr'
# IMAGE_URI = f'gcr.io/{PROJECT_ID}/{IMAGE_NAME}'

# DOCKERNAME = 'tfrs'
# REPO_DOCKER_PATH_PREFIX = 'src'
# MACHINE_TYPE ='e2-highcpu-32'
# FILE_LOCATION = './src'

# print(f"IMAGE_URI: {IMAGE_URI}")

In [None]:
# %%writefile {REPO_SRC}/cloudbuild.yaml

# steps:
# - name: 'gcr.io/cloud-builders/docker'
#   args: ['build', '-t', '$_IMAGE_URI', '$_FILE_LOCATION', '-f', '$_FILE_LOCATION/Dockerfile_$_DOCKERNAME']
# images:
# - '$_IMAGE_URI'

In [None]:
# print(f"DOCKERNAME: {DOCKERNAME}")
# print(f"IMAGE_URI: {IMAGE_URI}")
# print(f"FILE_LOCATION: {FILE_LOCATION}")
# print(f"MACHINE_TYPE: {MACHINE_TYPE}")

### set `gcloudignore`

In [None]:
# ! gcloud config set gcloudignore/enabled true

In [None]:
# %%writefile .gcloudignore
# .gcloudignore
# /local_files/
# img/*
# *.pkl
# *.png
# *.ipynb
# .git
# .github
# .ipynb_checkpoints/*
# *__pycache__
# *cpython-37.pyc
# spotipy_secret_creds.py
# custom_pipeline_spec.json
# /WIP/*
# beam_candidates/*
# beam_training/*
# learning/*
# src/vocab_pipes/*
# src/train_pipes/*
# src/feature_pipes/*
# test_root/*
# custom_track_meta_pipeline_spec.json
# pip_freeze.txt
# README.md
# .gitignore
# .DS_Store

In [None]:
# !gcloud meta list-files-for-upload

### submit job to Cloud Build

In [None]:
# ! gcloud builds submit --config src/cloudbuild.yaml \
#     --substitutions _DOCKERNAME=$DOCKERNAME,_IMAGE_URI=$IMAGE_URI,_FILE_LOCATION=$FILE_LOCATION \
#     --timeout=2h \
#     --machine-type=$MACHINE_TYPE

**Finished**