# Environment Setup for training TF-Agents

## Install Packages
Run `pip requirements.txt` in either (1) the notebook cell below or (2) in a notebook terminal window

In [1]:
# !pwd

In [2]:
import os

# The Vertex AI Workbench Notebook product has specific requirements
IS_WORKBENCH_NOTEBOOK = os.getenv("DL_ANACONDA_HOME")
IS_USER_MANAGED_WORKBENCH_NOTEBOOK = os.path.exists(
    "/opt/deeplearning/metadata/env_version"
)

# Vertex AI Notebook requires dependencies to be installed with '--user'
USER_FLAG = ""
if IS_WORKBENCH_NOTEBOOK:
    USER_FLAG = "--user"

# !pip install --no-cache-dir -r ./requirements.txt --user -q

## Set vars

In [3]:
# naming convention for all cloud resources
VERSION        = "v2"              # TODO
PREFIX         = f'rec-bandits-{VERSION}'   # TODO

print(f"PREFIX = {PREFIX}")

PREFIX = rec-bandits-v2


### GCP project

In [4]:
# creds, PROJECT_ID = google.auth.default()
GCP_PROJECTS             = !gcloud config get-value project
PROJECT_ID               = GCP_PROJECTS[0]

PROJECT_NUM              = !gcloud projects describe $PROJECT_ID --format="value(projectNumber)"
PROJECT_NUM              = PROJECT_NUM[0]

VERTEX_SA                = f'{PROJECT_NUM}-compute@developer.gserviceaccount.com'

VPC_NETWORK_NAME         = "ucaip-haystack-vpc-network"

# locations / regions for cloud resources
LOCATION                 = 'us-central1'        
REGION                   = LOCATION
BQ_LOCATION              = 'US'

print(f"PROJECT_ID       = {PROJECT_ID}")
print(f"PROJECT_NUM      = {PROJECT_NUM}")
print(f"VPC_NETWORK_NAME = {VPC_NETWORK_NAME}")
print(f"LOCATION         = {LOCATION}")
print(f"REGION           = {REGION}")
print(f"BQ_LOCATION      = {BQ_LOCATION}")

PROJECT_ID       = hybrid-vertex
PROJECT_NUM      = 934903580331
VPC_NETWORK_NAME = ucaip-haystack-vpc-network
LOCATION         = us-central1
REGION           = us-central1
BQ_LOCATION      = US


### Define Cloud Resource Names and Args

In [5]:
# GCS bucket and paths
BUCKET_NAME              = f'{PREFIX}-{PROJECT_ID}-bucket'
BUCKET_URI               = f'gs://{BUCKET_NAME}'

# Location to write TF-Records for MovieLens 100K dataset
DATA_GCS_PREFIX          = "data"
DATA_PATH                = f"{BUCKET_URI}/{DATA_GCS_PREFIX}"
VOCAB_SUBDIR             = "vocabs"
VOCAB_FILENAME           = 'vocab_dict.pkl'

VPC_NETWORK_FULL         = f"projects/{PROJECT_NUM}/global/networks/{VPC_NETWORK_NAME}"

# BigQuery parameters (used for the Generator, Ingester, Logger)
BIGQUERY_DATASET_ID      = f"{PROJECT_ID}.movielens_ds_{PREFIX}".replace("-","_")
BIGQUERY_TABLE_ID        = f"{BIGQUERY_DATASET_ID}.training_dataset"

# container registry
REPOSITORY                = f'rl-movielens-{PREFIX}'
IMAGE_NAME                = f'train-perarm-feats-{VERSION}'
DOCKERNAME                = f'Dockerfile_perarm_feats'

# cloud build
IMAGE_URI_01              = f'gcr.io/{PROJECT_ID}/{IMAGE_NAME}-01'
IMAGE_URI_02              = f'gcr.io/{PROJECT_ID}/{IMAGE_NAME}-02'

# docker (local build)
REMOTE_IMAGE_NAME         = f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{REPOSITORY}/{IMAGE_NAME}"

print(f"BUCKET_NAME          : {BUCKET_NAME}")
print(f"BUCKET_URI           : {BUCKET_URI}")

print(f"DATA_GCS_PREFIX      : {DATA_GCS_PREFIX}")
print(f"DATA_PATH            : {DATA_PATH}")
print(f"VOCAB_SUBDIR         : {VOCAB_SUBDIR}")
print(f"VOCAB_FILENAME       : {VOCAB_FILENAME}")

print(f"VPC_NETWORK_FULL     : {VPC_NETWORK_FULL}")

print(f"BIGQUERY_DATASET_ID  : {BIGQUERY_DATASET_ID}")
print(f"BIGQUERY_TABLE_ID    : {BIGQUERY_TABLE_ID}")

print(f"REPOSITORY           : {REPOSITORY}")
print(f"IMAGE_NAME           : {IMAGE_NAME}")
print(f"DOCKERNAME           : {DOCKERNAME}")

print(f"IMAGE_URI_01         : {IMAGE_URI_01}")
print(f"IMAGE_URI_02         : {IMAGE_URI_02}")
print(f"REMOTE_IMAGE_NAME    : {REMOTE_IMAGE_NAME}")

BUCKET_NAME          : rec-bandits-v2-hybrid-vertex-bucket
BUCKET_URI           : gs://rec-bandits-v2-hybrid-vertex-bucket
DATA_GCS_PREFIX      : data
DATA_PATH            : gs://rec-bandits-v2-hybrid-vertex-bucket/data
VOCAB_SUBDIR         : vocabs
VOCAB_FILENAME       : vocab_dict.pkl
VPC_NETWORK_FULL     : projects/934903580331/global/networks/ucaip-haystack-vpc-network
BIGQUERY_DATASET_ID  : hybrid_vertex.movielens_ds_rec_bandits_v2
BIGQUERY_TABLE_ID    : hybrid_vertex.movielens_ds_rec_bandits_v2.training_dataset
REPOSITORY           : rl-movielens-rec-bandits-v2
IMAGE_NAME           : train-perarm-feats-v2
DOCKERNAME           : Dockerfile_perarm_feats
IMAGE_URI_01         : gcr.io/hybrid-vertex/train-perarm-feats-v2-01
IMAGE_URI_02         : gcr.io/hybrid-vertex/train-perarm-feats-v2-02
REMOTE_IMAGE_NAME    : us-central1-docker.pkg.dev/hybrid-vertex/rl-movielens-rec-bandits-v2/train-perarm-feats-v2


In [8]:
# # create bucket
# ! gsutil mb -l $REGION $BUCKET_URI

In [9]:
# ! gsutil iam ch serviceAccount:{VERTEX_SA}:roles/storage.objects.get $BUCKET_URI
# ! gsutil iam ch serviceAccount:{VERTEX_SA}:roles/storage.objects.get $BUCKET_URI

## Repo structure

* these variables are used to structure the repo
* this means they are required for correctly building Dockerfile's, importing classes, etc.

In [10]:
REPO_DOCKER_PATH_PREFIX          = 'src'
RL_SUB_DIR                       = 'per_arm_rl'

print(f"REPO_DOCKER_PATH_PREFIX  : {REPO_DOCKER_PATH_PREFIX}")
print(f"RL_SUB_DIR               : {RL_SUB_DIR}")

REPO_DOCKER_PATH_PREFIX  : src
RL_SUB_DIR               : per_arm_rl


## Save Notebook Configuration Data
If you want to avoid having to re-enter these across notebooks

In [11]:
config = f"""
PROJECT_ID               = \"{PROJECT_ID}\"
PROJECT_NUM              = \"{PROJECT_NUM}\"
LOCATION                 = \"{LOCATION}\"

REGION                   = \"{REGION}\"
BQ_LOCATION              = \"{BQ_LOCATION}\"
VPC_NETWORK_NAME         = \"{VPC_NETWORK_NAME}\"

VERTEX_SA                = \"{VERTEX_SA}\"

PREFIX                   = \"{PREFIX}\"
VERSION                  = \"{VERSION}\"

BUCKET_NAME              = \"{BUCKET_NAME}\"
BUCKET_URI               = \"{BUCKET_URI}\"
DATA_GCS_PREFIX          = \"{DATA_GCS_PREFIX}\"
DATA_PATH                = \"{DATA_PATH}\"
VOCAB_SUBDIR             = \"{VOCAB_SUBDIR}\"
VOCAB_FILENAME           = \"{VOCAB_FILENAME}\"

VPC_NETWORK_FULL         = \"{VPC_NETWORK_FULL}\"

BIGQUERY_DATASET_ID      = \"{BIGQUERY_DATASET_ID}\"
BIGQUERY_TABLE_ID        = \"{BIGQUERY_TABLE_ID}\"

REPO_DOCKER_PATH_PREFIX  = \"{REPO_DOCKER_PATH_PREFIX}\"
RL_SUB_DIR               = \"{RL_SUB_DIR}\"

REPOSITORY               = \"{REPOSITORY}\"
IMAGE_NAME               = \"{IMAGE_NAME}\"
DOCKERNAME               = \"{DOCKERNAME}\"

IMAGE_URI_01             = \"{IMAGE_URI_01}\"
IMAGE_URI_02             = \"{IMAGE_URI_02}\"

REMOTE_IMAGE_NAME        = \"{REMOTE_IMAGE_NAME}\"
"""
print(config)


PROJECT_ID               = "hybrid-vertex"
PROJECT_NUM              = "934903580331"
LOCATION                 = "us-central1"

REGION                   = "us-central1"
BQ_LOCATION              = "US"
VPC_NETWORK_NAME         = "ucaip-haystack-vpc-network"

VERTEX_SA                = "934903580331-compute@developer.gserviceaccount.com"

PREFIX                   = "rec-bandits-v2"
VERSION                  = "v2"

BUCKET_NAME              = "rec-bandits-v2-hybrid-vertex-bucket"
BUCKET_URI               = "gs://rec-bandits-v2-hybrid-vertex-bucket"
DATA_GCS_PREFIX          = "data"
DATA_PATH                = "gs://rec-bandits-v2-hybrid-vertex-bucket/data"
VOCAB_SUBDIR             = "vocabs"
VOCAB_FILENAME           = "vocab_dict.pkl"

VPC_NETWORK_FULL         = "projects/934903580331/global/networks/ucaip-haystack-vpc-network"

BIGQUERY_DATASET_ID      = "hybrid_vertex.movielens_ds_rec_bandits_v2"
BIGQUERY_TABLE_ID        = "hybrid_vertex.movielens_ds_rec_bandits_v2.training_dataset"

REPO

In [12]:
!echo '{config}' | gsutil cp - {BUCKET_URI}/config/notebook_env.py

Copying from <STDIN>...
/ [1 files][    0.0 B/    0.0 B]                                                
Operation completed over 1 objects.                                              


In [13]:
!gsutil ls $BUCKET_URI

gs://rec-bandits-v2-hybrid-vertex-bucket/acc-paf-v3/
gs://rec-bandits-v2-hybrid-vertex-bucket/baseline-perarm-local-v1/
gs://rec-bandits-v2-hybrid-vertex-bucket/config/
gs://rec-bandits-v2-hybrid-vertex-bucket/data/
gs://rec-bandits-v2-hybrid-vertex-bucket/env-ranker-rec-bandits-v2/
gs://rec-bandits-v2-hybrid-vertex-bucket/local-ranker-rec-bandits-v2/
gs://rec-bandits-v2-hybrid-vertex-bucket/mab-local-classy-v3/
gs://rec-bandits-v2-hybrid-vertex-bucket/scale-perarm-hpt-v3/
gs://rec-bandits-v2-hybrid-vertex-bucket/v2-acc-cuda-alloc/
gs://rec-bandits-v2-hybrid-vertex-bucket/v2-acc-input-data-v1/
gs://rec-bandits-v2-hybrid-vertex-bucket/v2-acc-local-one-device/
gs://rec-bandits-v2-hybrid-vertex-bucket/v2-acc-local-oned-thdc-4/
gs://rec-bandits-v2-hybrid-vertex-bucket/v2-acc-local-v1/
gs://rec-bandits-v2-hybrid-vertex-bucket/v2-acc-no-summaries/
gs://rec-bandits-v2-hybrid-vertex-bucket/v2-local-2a-v1/
gs://rec-bandits-v2-hybrid-vertex-bucket/v2-scale-mod-v1/
gs://rec-bandits-v2-hybrid-vert

# gitignore

In [16]:
%%writefile .gitignore
*.cpython-310.pyc
*checkpoint*
*.ipynb_checkpoints/*
*WIP*
# .gcloudignore
# .git
# .github
# *__pycache__
# *cpython-37.pyc
# .gitignore
# .DS_Store

Overwriting .gitignore


# gcloudignore

In [5]:
! gcloud config set gcloudignore/enabled true

Updated property [gcloudignore/enabled].


In [2]:
%%writefile .gcloudignore
.gcloudignore
/WIP/
# /img/
*.pkl
*.png
*.ipynb
.git
.github
.ipynb_checkpoints/*
*__pycache__
*cpython-37.pyc
/hptuning/*
/imgs/*
README.md
.gitignore
.DS_Store
*.tfrecord
src/archive/*
00-archived/*
learning/*
04-pipelines/*

Overwriting .gcloudignore


In [13]:
# check eligble files
!gcloud meta list-files-for-upload

Dockerfile_perarm_feats
requirements.txt
cloudbuild.yaml
src/per_arm_rl/perarm_task.py
src/per_arm_rl/__init__.py
src/per_arm_rl/my_per_arm_py_env.py
src/per_arm_rl/policy_util.py
src/per_arm_rl/train_utils.py
src/per_arm_rl/trainer_baseline.py
src/per_arm_rl/data_utils.py
src/per_arm_rl/data_config.py
src/perarm_features/train_perarm.py
src/perarm_features/reward_factory.py
src/perarm_features/emb_features.py
src/perarm_features/agent_factory.py
src/perarm_features/__init__.py
src/perarm_features/trainer_common.py
src/perarm_features/task.py
src/perarm_features/eval_perarm.py
src/perarm_features/ranking_bandit_policy.py


**Finished**