# Build custom container for Vertex training

In [1]:
!pwd

/home/jupyter/tf_vertex_agents/02-perarm-features-bandit


## Load env config

* use the prefix from `00-env-setup`

In [2]:
PREFIX = 'mabv1'

In [3]:
# staging GCS
GCP_PROJECTS             = !gcloud config get-value project
PROJECT_ID               = GCP_PROJECTS[0]

# GCS bucket and paths
BUCKET_NAME              = f'{PREFIX}-{PROJECT_ID}-bucket'
BUCKET_URI               = f'gs://{BUCKET_NAME}'

config = !gsutil cat {BUCKET_URI}/config/notebook_env.py
print(config.n)
exec(config.n)


PROJECT_ID               = "hybrid-vertex"
PROJECT_NUM              = "934903580331"
LOCATION                 = "us-central1"

REGION                   = "us-central1"
BQ_LOCATION              = "US"
VPC_NETWORK_NAME         = "ucaip-haystack-vpc-network"

VERTEX_SA                = "934903580331-compute@developer.gserviceaccount.com"

PREFIX                   = "mabv1"
VERSION                  = "v1"

BUCKET_NAME              = "mabv1-hybrid-vertex-bucket"
BUCKET_URI               = "gs://mabv1-hybrid-vertex-bucket"
DATA_GCS_PREFIX          = "data"
DATA_PATH                = "gs://mabv1-hybrid-vertex-bucket/data"
VOCAB_SUBDIR             = "vocabs"
VOCAB_FILENAME           = "vocab_dict.pkl"

VPC_NETWORK_FULL         = "projects/934903580331/global/networks/ucaip-haystack-vpc-network"

BIGQUERY_DATASET_ID      = "hybrid-vertex.movielens_dataset_mabv1"
BIGQUERY_TABLE_ID        = "hybrid-vertex.movielens_dataset_mabv1.training_dataset"

REPO_DOCKER_PATH_PREFIX  = "src"
RL_SUB_DIR     

In [4]:
# ! gsutil ls $BUCKET_URI

## imports

In [5]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Build Image

In [6]:
# !tree src

## Container Image Variables

In [7]:
print(f"DOCKERNAME        = {DOCKERNAME}")
print(f"REPOSITORY        = {REPOSITORY}")
print(f"IMAGE_NAME        = {IMAGE_NAME}")
print(f"REMOTE_IMAGE_NAME = {REMOTE_IMAGE_NAME}")

DOCKERNAME        = Dockerfile_perarm_feats
REPOSITORY        = rl-movielens-mabv1
IMAGE_NAME        = train-perarm-feats-v1
REMOTE_IMAGE_NAME = us-central1-docker.pkg.dev/hybrid-vertex/rl-movielens-mabv1/train-perarm-feats-v1


## Create Artifact Repository

If you don't have an existing artifact repository, create one using the gcloud command below

In [8]:
# ! gcloud artifacts repositories create $REPOSITORY --repository-format=docker --location=$LOCATION

## Local Docker build

Provide a name for your dockerfile and make sure you are authenticated

In [9]:
! gcloud auth configure-docker $REGION-docker.pkg.dev --quiet


{
  "credHelpers": {
    "gcr.io": "gcloud",
    "us.gcr.io": "gcloud",
    "eu.gcr.io": "gcloud",
    "asia.gcr.io": "gcloud",
    "staging-k8s.gcr.io": "gcloud",
    "marketplace.gcr.io": "gcloud",
    "us-central1-docker.pkg.dev": "gcloud"
  }
}
Adding credentials for: us-central1-docker.pkg.dev
gcloud credential helpers already registered correctly.


To take a quick anonymous survey, run:
  $ gcloud survey



### Create Dockerfile

In [10]:
import os

root_path = '..'
os.chdir(root_path)
os.getcwd()

'/home/jupyter/tf_vertex_agents'

### Create train image

* see [example Dockerfile for GPU](https://github.com/GoogleCloudPlatform/cloudml-samples/blob/main/pytorch/containers/quickstart/mnist/Dockerfile-gpu) jobs in Vertex AI
* see deep learning container [example here](https://cloud.google.com/deep-learning-containers/docs/derivative-container), and here for [available DL containers](https://cloud.google.com/deep-learning-containers/docs/choosing-container#versions)

In [11]:
gpu_profiling = True # True | False

print(f"gpu_profiling : {gpu_profiling}")

gpu_profiling : True


In [12]:
# TRAIN_BASE_IMAGE = 'us-docker.pkg.dev/vertex-ai/training/tf-gpu.2-11:latest'
# docker pull tensorflow/tensorflow:2.13.0-gpu

if gpu_profiling:
    # TRAIN_BASE_IMAGE = 'tensorflow/tensorflow:2.13.0-gpu'
    TRAIN_BASE_IMAGE = 'gcr.io/deeplearning-platform-release/tf2-gpu.2-12.py310'
    NVTOP_RUN = 'RUN apt update && apt -y install nvtop'
    # NVTOP_RUN = 'RUN apt-get update && apt-get -y install nvtop'
else:
    TRAIN_BASE_IMAGE = 'python:3.10'
    NVTOP_RUN = None
    
RUN_EXPORT = "RUN export PYTHONPATH=${PYTHONPATH}:${APP_HOME}/"
    
print(f"TRAIN_BASE_IMAGE : {TRAIN_BASE_IMAGE}")
print(f"NVTOP_RUN        : {NVTOP_RUN}")
print(f"RUN_EXPORT       : {RUN_EXPORT}")

TRAIN_BASE_IMAGE : gcr.io/deeplearning-platform-release/tf2-gpu.2-12.py310
NVTOP_RUN        : RUN apt update && apt -y install nvtop
RUN_EXPORT       : RUN export PYTHONPATH=${PYTHONPATH}:${APP_HOME}/


In [13]:
dockerfile = f'''
FROM {TRAIN_BASE_IMAGE}

ENV PYTHONUNBUFFERED True

ENV APP_HOME /workspace

WORKDIR $APP_HOME

COPY /requirements.txt $APP_HOME/requirements.txt

RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r $APP_HOME/requirements.txt

RUN ls $APP_HOME

COPY src/perarm_features $APP_HOME/src/perarm_features
COPY src/per_arm_rl $APP_HOME/src/per_arm_rl

{NVTOP_RUN}

RUN ls $APP_HOME

{RUN_EXPORT}

# Sets up the entry point to invoke the task.
ENTRYPOINT ["python3", "-m", "src.perarm_features.task"]
'''
print(dockerfile)


FROM gcr.io/deeplearning-platform-release/tf2-gpu.2-12.py310

ENV PYTHONUNBUFFERED True

ENV APP_HOME /workspace

WORKDIR $APP_HOME

COPY /requirements.txt $APP_HOME/requirements.txt

RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r $APP_HOME/requirements.txt

RUN ls $APP_HOME

COPY src/perarm_features $APP_HOME/src/perarm_features
COPY src/per_arm_rl $APP_HOME/src/per_arm_rl

RUN apt update && apt -y install nvtop

RUN ls $APP_HOME

RUN export PYTHONPATH=${PYTHONPATH}:${APP_HOME}/

# Sets up the entry point to invoke the task.
ENTRYPOINT ["python3", "-m", "src.perarm_features.task"]



In [14]:
with open(f'{DOCKERNAME}', 'w') as f:
    f.write(dockerfile)

### Build Image Locally

In [15]:
!docker build -t $REMOTE_IMAGE_NAME -f $DOCKERNAME .

Sending build context to Docker daemon   79.4MB
Step 1/14 : FROM gcr.io/deeplearning-platform-release/tf2-gpu.2-12.py310
latest: Pulling from deeplearning-platform-release/tf2-gpu.2-12.py310

[1B1dcae6ca: Pulling fs layer 
[1Bc00479c6: Pulling fs layer 
[1Bfa233a80: Pulling fs layer 
[1Bf70b397f: Pulling fs layer 
[1Ba42bc712: Pulling fs layer 
[1B1e6a2dfe: Pulling fs layer 
[1Be6c74f13: Pulling fs layer 
[1B03310895: Pulling fs layer 
[1Ba5c35cf0: Pulling fs layer 
[1Bb5cae66b: Pulling fs layer 
[1Bc15a6664: Pulling fs layer 
[1Ba8ea64b5: Pulling fs layer 
[1B7e334275: Pulling fs layer 
[1Bdeece3a2: Pulling fs layer 
[1Bb700ef54: Pulling fs layer 
[1B82b52526: Pulling fs layer 
[1B6f03fe15: Pulling fs layer 
[1B20986992: Pulling fs layer 
[1B5634e631: Pulling fs layer 
[1Bfd77f613: Pulling fs layer 
[1B5ccf2133: Pulling fs layer 
[1B8fb8d7a7: Pulling fs layer 
[1B5d33e4c4: Pulling fs layer 
[13B8ea64b5: Waiting fs layer 
[1B3d172136: Pulling fs layer 
[22B42b

### Push container to Registry

In [16]:
# ### push the container to registry
!docker push $REMOTE_IMAGE_NAME

Using default tag: latest
The push refers to repository [us-central1-docker.pkg.dev/hybrid-vertex/rl-movielens-mabv1/train-perarm-feats-v1]

[1B4588a995: Preparing 
[1B1be53798: Preparing 
[1Ba4275e68: Preparing 
[1B888d60b4: Preparing 
[1B660aaeb4: Preparing 
[1Beb5dce44: Preparing 
[1B5e48771c: Preparing 
[1B66a42995: Preparing 
[1Be50f8e92: Preparing 
[1Be289181a: Preparing 
[1Bb5a27b90: Preparing 
[1B492dc9b6: Preparing 
[1B1042b0da: Preparing 
[1B03634fe6: Preparing 
[1Bb1b74e71: Preparing 
[1Bf31f5d7e: Preparing 
[1B6195ee3c: Preparing 
[1Bffbd43ec: Preparing 
[1B8feea017: Preparing 
[1Bbdb31fb7: Preparing 
[1B6623bd9c: Preparing 
[1B3903d06c: Preparing 
[1Bc3339ceb: Preparing 
[1B0b71d36c: Preparing 
[1B979ef0d7: Preparing 
[1B16d8d213: Preparing 
[1Bca06987e: Preparing 
[1B4c28b064: Preparing 
[1B82c8dc78: Preparing 
[1B3b5920e8: Preparing 
[1B5d7e0e9a: Preparing 
[27Bb5dce44: Waiting g 
[1Bcdbeba29: Preparing 
[1B82e04ede: Preparing 
[29Be487

### GPU profiling

> enter these commands in the Vertex interactive terminal:

```bash
sudo apt update
sudo apt -y install nvtop
```

**Finished**