## Build custom container for Vertex training

In [1]:
!pwd

/home/jupyter/jw-repo2/spotify_mpd_two_tower


In [2]:
# naming convention for all cloud resources
VERSION        = "v1"                  # TODO
PREFIX         = f'ndr-{VERSION}'      # TODO

print(f"PREFIX = {PREFIX}")

PREFIX = ndr-v1


In [3]:
# staging GCS
GCP_PROJECTS             = !gcloud config get-value project
PROJECT_ID               = GCP_PROJECTS[0]

# GCS bucket and paths
BUCKET_NAME              = f'{PREFIX}-{PROJECT_ID}-bucket'
BUCKET_URI               = f'gs://{BUCKET_NAME}'

config = !gsutil cat {BUCKET_URI}/config/notebook_env.py
print(config.n)
exec(config.n)


PROJECT_ID               = "hybrid-vertex"
PROJECT_NUM              = "934903580331"
LOCATION                 = "us-central1"

REGION                   = "us-central1"
BQ_LOCATION              = "US"
VPC_NETWORK_NAME         = "ucaip-haystack-vpc-network"

VERTEX_SA                = "934903580331-compute@developer.gserviceaccount.com"

PREFIX                   = "ndr-v1"
VERSION                  = "v1"

APP                      = "sp"
MODEL_TYPE               = "2tower"
FRAMEWORK                = "tfrs"
DATA_VERSION             = "v1"
TRACK_HISTORY            = "5"

BUCKET_NAME              = "ndr-v1-hybrid-vertex-bucket"
BUCKET_URI               = "gs://ndr-v1-hybrid-vertex-bucket"
SOURCE_BUCKET            = "spotify-million-playlist-dataset"

DATA_GCS_PREFIX          = "data"
DATA_PATH                = "gs://ndr-v1-hybrid-vertex-bucket/data"
VOCAB_SUBDIR             = "vocabs"
VOCAB_FILENAME           = "vocab_dict.pkl"

CANDIDATE_PREFIX         = "candidates"
TRAIN_DIR_PREFIX      

In [4]:
! gsutil ls $BUCKET_URI

gs://ndr-v1-hybrid-vertex-bucket/config/
gs://ndr-v1-hybrid-vertex-bucket/data/
gs://ndr-v1-hybrid-vertex-bucket/endpoint-tests/
gs://ndr-v1-hybrid-vertex-bucket/local-train-v1/
gs://ndr-v1-hybrid-vertex-bucket/local-train-v2/
gs://ndr-v1-hybrid-vertex-bucket/scale-training-v1/
gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/


## Imports

In [5]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Build Image

In [6]:
print(f"DOCKERNAME        = {DOCKERNAME}")
print(f"REPOSITORY        = {REPOSITORY}")
print(f"IMAGE_NAME        = {IMAGE_NAME}")
print(f"REMOTE_IMAGE_NAME = {REMOTE_IMAGE_NAME}")

DOCKERNAME        = tfrs
REPOSITORY        = ndr-v1-spotify
IMAGE_NAME        = train-v1
REMOTE_IMAGE_NAME = us-central1-docker.pkg.dev/hybrid-vertex/ndr-v1-spotify/train-v1


In [84]:
!pwd

/home/jupyter/jw-repo2/spotify_mpd_two_tower


In [88]:
!tree src

[01;34msrc[00m
├── Dockerfile_rank
├── Dockerfile_tfrs
├── cloudbuild.yaml
├── [01;34mfeature_pipes[00m
│   ├── call_spotify_api_artist.py
│   └── call_spotify_api_audio.py
├── [01;34mfeatures[00m
│   └── feature_store_configs.py
├── [01;34mranking[00m
│   ├── __init__.py
│   ├── build_audio_ranker.py
│   ├── feature_sets.py
│   ├── requirements.txt
│   ├── task.py
│   ├── tf_ranking_model.py
│   ├── train_config.py
│   └── train_utils.py
├── [01;34mtrain_pipes[00m
│   ├── create_ann_index.py
│   ├── create_ann_index_endpoint_vpc.py
│   ├── create_brute_force_index.py
│   ├── create_brute_index_endpoint_vpc.py
│   ├── create_tensorboard.py
│   ├── deploy_ann_index.py
│   ├── deploy_brute_index.py
│   ├── generate_candidates.py
│   ├── model_monitoring_config.py
│   ├── pipeline_config.py
│   ├── test_model_endpoint.py
│   ├── test_model_index_endpoint.py
│   └── train_custom_model.py
├── [01;34mtwo_tower_jt[00m
│   ├── __init__.py
│   ├── feature_sets.py
│   ├── interactive

## Create Artifact Repository
If you don't have an existing artifact repository, create one using the gcloud command below

In [7]:
! gcloud artifacts repositories create $REPOSITORY --repository-format=docker --location=$LOCATION

## Local Docker build
Provide a name for your dockerfile and make sure you are authenticated

In [8]:
! gcloud auth configure-docker $REGION-docker.pkg.dev --quiet


{
  "credHelpers": {
    "gcr.io": "gcloud",
    "us.gcr.io": "gcloud",
    "eu.gcr.io": "gcloud",
    "asia.gcr.io": "gcloud",
    "staging-k8s.gcr.io": "gcloud",
    "marketplace.gcr.io": "gcloud",
    "us-central1-docker.pkg.dev": "gcloud"
  }
}
Adding credentials for: us-central1-docker.pkg.dev
gcloud credential helpers already registered correctly.


### Requirements & Config files

In [8]:
%%writefile {REPO_SRC}/two_tower_jt/train_config.py
PROJECT_ID='hybrid-vertex'
TRACK_HISTORY = 5

Overwriting src/two_tower_jt/train_config.py


In [9]:
%%writefile {REPO_SRC}/two_tower_jt/requirements.txt
google-cloud-aiplatform[cloud_profiler]>=1.26.1
tensorflow-recommenders==0.7.2
tensorboard==2.10.1
# tensorboard==2.11.2 
tensorboard-data-server==0.6.1
tensorboard-plugin-profile==2.11.1
tensorflow-io==0.27.0
tensorboard-plugin-wit
protobuf==3.19.6

Overwriting src/two_tower_jt/requirements.txt


## Create Dockerfile

* do this : `COPY /dir /dir`
* not this: `COPY /dir/* /dir/`

commands
* `ENV PYTHONDONTWRITEBYTECODE 1`
* alt_base_image: `FROM gcr.io/deeplearning-platform-release/tf-gpu.2-11`
* `COPY /src/two_tower_jt $APP_HOME/src/two_tower_jt`

In [113]:
%%writefile {REPO_SRC}/Dockerfile_{DOCKERNAME}

FROM tensorflow/tensorflow:2.11.0-gpu

ENV PYTHONUNBUFFERED True

ENV APP_HOME /workspace

WORKDIR $APP_HOME

COPY /two_tower_jt $APP_HOME/src/two_tower_jt

RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r $APP_HOME/src/two_tower_jt/requirements.txt

RUN apt update && apt -y install nvtop

RUN export PYTHONPATH=${PYTHONPATH}:${APP_HOME}/

Overwriting src/Dockerfile_tfrs


## .dockerignore

In [114]:
# %%writefile {REPO_SRC}/.dockerignore
# **/__pycache__
# **/*.pyc
# ranking/*
# feature_pipes/*
# vocab_pipes/*

## Build Image Locally

run commands in terminal window

In [115]:
# # set variables if running in terminal
print("copy these commands into terminal:\n")
print(f"export REPO_SRC={REPO_SRC}")
print(f"export REMOTE_IMAGE_NAME={REMOTE_IMAGE_NAME}")
print(f"export DOCKERNAME={DOCKERNAME}")
print(f"docker build -t $REMOTE_IMAGE_NAME -f $REPO_SRC/Dockerfile_$DOCKERNAME .")

# !docker build -t $REMOTE_IMAGE_NAME -f $REPO_SRC/Dockerfile_$DOCKERNAME .

copy these commands into terminal:

export REPO_SRC=src
export REMOTE_IMAGE_NAME=us-central1-docker.pkg.dev/hybrid-vertex/ndr-v1-spotify/train-v1
export DOCKERNAME=tfrs
docker build -t $REMOTE_IMAGE_NAME -f $REPO_SRC/Dockerfile_$DOCKERNAME .


## Push container to Registry

In [116]:
print("copy this command into terminal:\n")
print(f"docker push $REMOTE_IMAGE_NAME")

# ! docker push $REMOTE_IMAGE_NAME

copy this command into terminal:

docker push $REMOTE_IMAGE_NAME


```
sudo apt-get autoclean
sudo apt-get autoremove
```

## (Optional) Build Image with Cloud Build

In [117]:
import os

# root_path = '..'
root_path = '/home/jupyter/jw-repo2/spotify_mpd_two_tower'
os.chdir(root_path)
os.getcwd()

'/home/jupyter/jw-repo2/spotify_mpd_two_tower'

In [118]:
# ! gcloud beta artifacts repositories create {REPOSITORY} \
#     --repository-format=docker \
#     --location=$LOCATION

### set `gcloudignore`

In [119]:
! gcloud config set gcloudignore/enabled true

Updated property [gcloudignore/enabled].


In [129]:
%%writefile .gcloudignore
.gcloudignore
img/*
*.pkl
*.png
*.ipynb
.git
.github
.ipynb_checkpoints/*
candidate_embeddings.json
spotipy_secret_creds.py
pip_freeze.txt
custom_pipeline_spec.json
prediction_featurestore_fetch_config.yaml
README.md
.gitignore
.DS_Store
WIP/two_tower_lite.py
WIP/ranking/ranking.md
beam_candidates/*
beam_training/*
learning/*
train_pipes/*
vocab_pipes/*
ARCHIVED/*
ranking/*
util/__pycache__/*
src/.dockerignore

Overwriting .gcloudignore


In [134]:
!gcloud meta list-files-for-upload

src/Dockerfile_tfrs
src/cloudbuild.yaml
src/Dockerfile_rank
src/features/feature_store_configs.py
src/two_tower_jt/__init__.py
src/two_tower_jt/train_utils.py
src/two_tower_jt/interactive_train.py
src/two_tower_jt/test_instances.py
src/two_tower_jt/train_config.py
src/two_tower_jt/two_tower.py
src/two_tower_jt/requirements.txt
src/two_tower_jt/feature_sets.py
src/two_tower_jt/task.py
src/feature_pipes/call_spotify_api_audio.py
src/feature_pipes/call_spotify_api_artist.py
util/local_utils.py
util/feature_set_utils.py
util/two_tower_16_bit_edition.py
util/train_utils.py
util/install-nvtop.sh
util/accelerators.py
util/workerpool_specs.py
util/test_instances.py


### submit job to Cloud Build

In [124]:
%%writefile {REPO_SRC}/cloudbuild.yaml

steps:
- name: 'gcr.io/cloud-builders/docker'
  args: ['build', '-t', '$_IMAGE_URI', '$_FILE_LOCATION', '-f', '$_FILE_LOCATION/Dockerfile_$_DOCKERNAME']
images:
- '$_IMAGE_URI'

Overwriting src/cloudbuild.yaml


In [132]:
# !pwd

In [126]:
MODEL_ROOT_NAME         = f'{APP}-{MODEL_TYPE}-{FRAMEWORK}-{VERSION}'

# Docker definitions for training
IMAGE_NAME              = f'{MODEL_ROOT_NAME}-tr'
IMAGE_URI               = f'gcr.io/{PROJECT_ID}/{IMAGE_NAME}'

DOCKERNAME              = 'tfrs'
MACHINE_TYPE            ='e2-highcpu-32'
FILE_LOCATION           = "src" # './src'

print(f"MODEL_ROOT_NAME   : {MODEL_ROOT_NAME}")
print(f"IMAGE_URI         : {IMAGE_URI}")
print(f"DOCKERNAME        : {DOCKERNAME}")
print(f"IMAGE_URI         : {IMAGE_URI}")
print(f"FILE_LOCATION     : {FILE_LOCATION}")
print(f"MACHINE_TYPE      : {MACHINE_TYPE}")

MODEL_ROOT_NAME   : sp-2tower-tfrs-v1
IMAGE_URI         : gcr.io/hybrid-vertex/sp-2tower-tfrs-v1-tr
DOCKERNAME        : tfrs
IMAGE_URI         : gcr.io/hybrid-vertex/sp-2tower-tfrs-v1-tr
FILE_LOCATION     : src
MACHINE_TYPE      : e2-highcpu-32


In [131]:
# !tree src

In [133]:
! gcloud builds submit --config src/cloudbuild.yaml \
    --substitutions _DOCKERNAME=$DOCKERNAME,_IMAGE_URI=$IMAGE_URI,_FILE_LOCATION=$FILE_LOCATION \
    --timeout=2h \
    --machine-type=$MACHINE_TYPE

**Finished**