# Build custom container for Vertex training

In [1]:
!pwd

/home/jupyter/tf_vertex_agents/04-perarm-features-bandit


## Load env config

* use the prefix from `00-env-setup`

In [2]:
PREFIX = 'mabv1'

In [3]:
# staging GCS
GCP_PROJECTS             = !gcloud config get-value project
PROJECT_ID               = GCP_PROJECTS[0]

# GCS bucket and paths
BUCKET_NAME              = f'{PREFIX}-{PROJECT_ID}-bucket'
BUCKET_URI               = f'gs://{BUCKET_NAME}'

config = !gsutil cat {BUCKET_URI}/config/notebook_env.py
print(config.n)
exec(config.n)


PROJECT_ID               = "hybrid-vertex"
PROJECT_NUM              = "934903580331"
LOCATION                 = "us-central1"

REGION                   = "us-central1"
BQ_LOCATION              = "US"
VPC_NETWORK_NAME         = "ucaip-haystack-vpc-network"

VERTEX_SA                = "934903580331-compute@developer.gserviceaccount.com"

PREFIX                   = "mabv1"
VERSION                  = "v1"

BUCKET_NAME              = "mabv1-hybrid-vertex-bucket"
BUCKET_URI               = "gs://mabv1-hybrid-vertex-bucket"
DATA_GCS_PREFIX          = "data"
DATA_PATH                = "gs://mabv1-hybrid-vertex-bucket/data"
VOCAB_SUBDIR             = "vocabs"
VOCAB_FILENAME           = "vocab_dict.pkl"

VPC_NETWORK_FULL         = "projects/934903580331/global/networks/ucaip-haystack-vpc-network"

BIGQUERY_DATASET_ID      = "hybrid-vertex.movielens_dataset_mabv1"
BIGQUERY_TABLE_ID        = "hybrid-vertex.movielens_dataset_mabv1.training_dataset"

REPO_DOCKER_PATH_PREFIX  = "src"
RL_SUB_DIR     

In [4]:
! gsutil ls $BUCKET_URI

gs://mabv1-hybrid-vertex-bucket/acc-paf-v2/
gs://mabv1-hybrid-vertex-bucket/aiplatform-custom-training-2023-07-13-22:02:47.458/
gs://mabv1-hybrid-vertex-bucket/aiplatform-custom-training-2023-07-14-12:09:06.643/
gs://mabv1-hybrid-vertex-bucket/aiplatform-custom-training-2023-07-14-12:46:07.509/
gs://mabv1-hybrid-vertex-bucket/aiplatform-custom-training-2023-07-14-13:48:08.938/
gs://mabv1-hybrid-vertex-bucket/aiplatform-custom-training-2023-07-17-18:35:00.675/
gs://mabv1-hybrid-vertex-bucket/aiplatform-custom-training-2023-07-17-18:50:42.146/
gs://mabv1-hybrid-vertex-bucket/archived/
gs://mabv1-hybrid-vertex-bucket/banditos-2/
gs://mabv1-hybrid-vertex-bucket/baseline-bandit-v1/
gs://mabv1-hybrid-vertex-bucket/config/
gs://mabv1-hybrid-vertex-bucket/custom-neural-bandits-a100/
gs://mabv1-hybrid-vertex-bucket/custom-neural-bandits-v1/
gs://mabv1-hybrid-vertex-bucket/data/
gs://mabv1-hybrid-vertex-bucket/linear-bandit-v1/
gs://mabv1-hybrid-vertex-bucket/mab-local-v4/
gs://mabv1-hybrid-vert

## imports

In [5]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Build Image

## Container Image Variables

In [6]:
print(f"DOCKERNAME        = {DOCKERNAME}")
print(f"REPOSITORY        = {REPOSITORY}")
print(f"IMAGE_NAME        = {IMAGE_NAME}")
print(f"REMOTE_IMAGE_NAME = {REMOTE_IMAGE_NAME}")

DOCKERNAME        = Dockerfile_perarm_feats
REPOSITORY        = rl-movielens-mabv1
IMAGE_NAME        = train-perarm-feats-v1
REMOTE_IMAGE_NAME = us-central1-docker.pkg.dev/hybrid-vertex/rl-movielens-mabv1/train-perarm-feats-v1


## Create Artifact Repository

If you don't have an existing artifact repository, create one using the gcloud command below

In [7]:
# ! gcloud artifacts repositories create $REPOSITORY --repository-format=docker --location=$LOCATION

## Local Docker build

Provide a name for your dockerfile and make sure you are authenticated

In [8]:
! gcloud auth configure-docker $REGION-docker.pkg.dev --quiet


{
  "credHelpers": {
    "gcr.io": "gcloud",
    "us.gcr.io": "gcloud",
    "eu.gcr.io": "gcloud",
    "asia.gcr.io": "gcloud",
    "staging-k8s.gcr.io": "gcloud",
    "marketplace.gcr.io": "gcloud",
    "us-central1-docker.pkg.dev": "gcloud"
  }
}
Adding credentials for: us-central1-docker.pkg.dev
gcloud credential helpers already registered correctly.


### Create Dockerfile

In [9]:
import os

root_path = '..'
os.chdir(root_path)
os.getcwd()

'/home/jupyter/tf_vertex_agents'

### Create train image

see [example Dockerfile for GPU](https://github.com/GoogleCloudPlatform/cloudml-samples/blob/main/pytorch/containers/quickstart/mnist/Dockerfile-gpu) jobs in Vertex AI

In [10]:
gpu_profiling = True # True | False

print(f"gpu_profiling : {gpu_profiling}")

gpu_profiling : True


In [11]:
# TRAIN_BASE_IMAGE = 'us-docker.pkg.dev/vertex-ai/training/tf-gpu.2-11:latest'
# docker pull tensorflow/tensorflow:2.13.0-gpu

if gpu_profiling:
    TRAIN_BASE_IMAGE = 'tensorflow/tensorflow:2.13.0-gpu'
    NVTOP_RUN = 'RUN apt update && apt -y install nvtop'
    # NVTOP_RUN = 'RUN apt-get update && apt-get -y install nvtop'
else:
    TRAIN_BASE_IMAGE = 'python:3.10'
    NVTOP_RUN = None
    
RUN_EXPORT = "RUN export PYTHONPATH=${PYTHONPATH}:${APP_HOME}/"
    
print(f"TRAIN_BASE_IMAGE : {TRAIN_BASE_IMAGE}")
print(f"NVTOP_RUN        : {NVTOP_RUN}")
print(f"RUN_EXPORT       : {RUN_EXPORT}")

TRAIN_BASE_IMAGE : tensorflow/tensorflow:2.13.0-gpu
NVTOP_RUN        : RUN apt update && apt -y install nvtop
RUN_EXPORT       : RUN export PYTHONPATH=${PYTHONPATH}:${APP_HOME}/


In [12]:
dockerfile = f'''
FROM {TRAIN_BASE_IMAGE}

ENV PYTHONUNBUFFERED True

ENV APP_HOME /workspace

WORKDIR $APP_HOME

COPY /requirements.txt $APP_HOME/requirements.txt

RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r $APP_HOME/requirements.txt

RUN ls $APP_HOME

COPY src/perarm_features $APP_HOME/src/perarm_features
COPY src/per_arm_rl $APP_HOME/src/per_arm_rl

{NVTOP_RUN}

RUN ls $APP_HOME

{RUN_EXPORT}

# Sets up the entry point to invoke the task.
ENTRYPOINT ["python3", "-m", "src.perarm_features.task"]
'''
print(dockerfile)


FROM tensorflow/tensorflow:2.13.0-gpu

ENV PYTHONUNBUFFERED True

ENV APP_HOME /workspace

WORKDIR $APP_HOME

COPY /requirements.txt $APP_HOME/requirements.txt

RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r $APP_HOME/requirements.txt

RUN ls $APP_HOME

COPY src/perarm_features $APP_HOME/src/perarm_features
COPY src/per_arm_rl $APP_HOME/src/per_arm_rl

RUN apt update && apt -y install nvtop

RUN ls $APP_HOME

RUN export PYTHONPATH=${PYTHONPATH}:${APP_HOME}/

# Sets up the entry point to invoke the task.
ENTRYPOINT ["python3", "-m", "src.perarm_features.task"]



In [13]:
with open(f'{DOCKERNAME}', 'w') as f:
    f.write(dockerfile)

In [14]:
# %%writefile {DOCKERNAME}

# FROM python:3.10
# ENV PYTHONUNBUFFERED True

# ENV APP_HOME /workspace
# WORKDIR $APP_HOME

# COPY /requirements.txt $APP_HOME/requirements.txt

# RUN pip install --upgrade pip
# RUN pip install --no-cache-dir -r $APP_HOME/requirements.txt

# RUN ls $APP_HOME

# COPY src/perarm_features $APP_HOME/src/perarm_features
# COPY src/per_arm_rl $APP_HOME/src/per_arm_rl

# RUN ls $APP_HOME

# RUN export PYTHONPATH=${PYTHONPATH}:${APP_HOME}/

# # Sets up the entry point to invoke the task.
# ENTRYPOINT ["python3", "-m", "src.perarm_features.task"]

### Build Image Locally

In [15]:
!docker build -t $REMOTE_IMAGE_NAME -f $DOCKERNAME .

Sending build context to Docker daemon  13.29MB
Step 1/14 : FROM tensorflow/tensorflow:2.13.0-gpu
 ---> 6bdca089cc38
Step 2/14 : ENV PYTHONUNBUFFERED True
 ---> Using cache
 ---> d3da8d2bd662
Step 3/14 : ENV APP_HOME /workspace
 ---> Using cache
 ---> 077e5e85305c
Step 4/14 : WORKDIR $APP_HOME
 ---> Using cache
 ---> b597986a6c66
Step 5/14 : COPY /requirements.txt $APP_HOME/requirements.txt
 ---> Using cache
 ---> 7e1d8aeab948
Step 6/14 : RUN pip install --upgrade pip
 ---> Using cache
 ---> 5b9233eb7d24
Step 7/14 : RUN pip install --no-cache-dir -r $APP_HOME/requirements.txt
 ---> Using cache
 ---> d1d12f5180ae
Step 8/14 : RUN ls $APP_HOME
 ---> Using cache
 ---> 759471a524ec
Step 9/14 : COPY src/perarm_features $APP_HOME/src/perarm_features
 ---> 3975a9ecd356
Step 10/14 : COPY src/per_arm_rl $APP_HOME/src/per_arm_rl
 ---> 94ccca194af2
Step 11/14 : RUN apt update && apt -y install nvtop
 ---> Running in 08c0e7332d29
[91m

[0mGet:1 http://archive.ubuntu.com/ubuntu focal InRelease [26

### Push container to Registry

In [16]:
# ### push the container to registry
!docker push $REMOTE_IMAGE_NAME

Using default tag: latest
The push refers to repository [us-central1-docker.pkg.dev/hybrid-vertex/rl-movielens-mabv1/train-perarm-feats-v1]

[1Bc3c98e11: Preparing 
[1Bf86ff904: Preparing 
[1B81339efa: Preparing 
[1B30ff1fbf: Preparing 
[1Bdd35e987: Preparing 
[1B8e604636: Preparing 
[1B14dc7caa: Preparing 
[1Ba40e4dcd: Preparing 
[1Bb5695a98: Preparing 
[1Bf0d116f4: Preparing 
[1B2813a979: Preparing 
[1B6e868aa5: Preparing 
[1B136c7d36: Preparing 
[1B891e0e76: Preparing 
[1Bed848ac5: Preparing 
[1B18b47754: Preparing 
[1B91e05b94: Preparing 
[1Be103257c: Preparing 
[1Bb25399cb: Preparing 
[1Bb667a965: Preparing 
[1B6ad9c95e: Preparing 
[1Bb4e1ecd1: Preparing 
[1B5c845fcf: Preparing 
[1Ba7216f78: Preparing 
[25B3c98e11: Pushed   223.9MB/223.8MB[23A[2K[19A[2K[25A[2K[16A[2K[23A[2K[10A[2K[4A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A[2K[25A

### GPU profiling

> enter these commands in the Vertex interactive terminal:

```bash
sudo apt update
sudo apt -y install nvtop
```