# Environment Setup for training TF-Agents

## Install Packages
Run `pip requirements.txt` in either (1) the notebook cell below or (2) in a notebook terminal window

In [2]:
# !pwd

In [3]:
import os

# The Vertex AI Workbench Notebook product has specific requirements
IS_WORKBENCH_NOTEBOOK = os.getenv("DL_ANACONDA_HOME")
IS_USER_MANAGED_WORKBENCH_NOTEBOOK = os.path.exists(
    "/opt/deeplearning/metadata/env_version"
)

# Vertex AI Notebook requires dependencies to be installed with '--user'
USER_FLAG = ""
if IS_WORKBENCH_NOTEBOOK:
    USER_FLAG = "--user"

# !pip install --no-cache-dir -r ./requirements.txt --user -q

## Set vars

In [20]:
# naming convention for all cloud resources
VERSION        = "v1"              # TODO
PREFIX         = f'mab{VERSION}'   # TODO

print(f"PREFIX = {PREFIX}")

PREFIX = mabv1


### GCP project

In [21]:
# creds, PROJECT_ID = google.auth.default()
GCP_PROJECTS             = !gcloud config get-value project
PROJECT_ID               = GCP_PROJECTS[0]

PROJECT_NUM              = !gcloud projects describe $PROJECT_ID --format="value(projectNumber)"
PROJECT_NUM              = PROJECT_NUM[0]

VERTEX_SA                = f'{PROJECT_NUM}-compute@developer.gserviceaccount.com'

VPC_NETWORK_NAME         = "ucaip-haystack-vpc-network"

# locations / regions for cloud resources
LOCATION                 = 'us-central1'        
REGION                   = LOCATION
BQ_LOCATION              = 'US'

print(f"PROJECT_ID       = {PROJECT_ID}")
print(f"PROJECT_NUM      = {PROJECT_NUM}")
print(f"VPC_NETWORK_NAME = {VPC_NETWORK_NAME}")
print(f"LOCATION         = {LOCATION}")
print(f"REGION           = {REGION}")
print(f"BQ_LOCATION      = {BQ_LOCATION}")

PROJECT_ID       = hybrid-vertex
PROJECT_NUM      = 934903580331
VPC_NETWORK_NAME = ucaip-haystack-vpc-network
LOCATION         = us-central1
REGION           = us-central1
BQ_LOCATION      = US


### Define Cloud Resource Names and Args

In [22]:
# GCS bucket and paths
BUCKET_NAME              = f'{PREFIX}-{PROJECT_ID}-bucket'
BUCKET_URI               = f'gs://{BUCKET_NAME}'

# Location to write TF-Records for MovieLens 100K dataset
DATA_GCS_PREFIX          = "data"
DATA_PATH                = f"{BUCKET_URI}/{DATA_GCS_PREFIX}"

VPC_NETWORK_FULL         = f"projects/{PROJECT_NUM}/global/networks/{VPC_NETWORK_NAME}"

# BigQuery parameters (used for the Generator, Ingester, Logger)
BIGQUERY_DATASET_ID      = f"{PROJECT_ID}.movielens_dataset_{PREFIX}"
BIGQUERY_TABLE_ID        = f"{BIGQUERY_DATASET_ID}.training_dataset"

print(f"BUCKET_NAME          : {BUCKET_NAME}")
print(f"BUCKET_URI           : {BUCKET_URI}")
print(f"DATA_GCS_PREFIX      : {DATA_GCS_PREFIX}")
print(f"DATA_PATH            : {DATA_PATH}")
print(f"VPC_NETWORK_FULL     : {VPC_NETWORK_FULL}")
print(f"BIGQUERY_DATASET_ID  : {BIGQUERY_DATASET_ID}")
print(f"BIGQUERY_TABLE_ID    : {BIGQUERY_TABLE_ID}")

BUCKET_NAME          : mabv1-hybrid-vertex-bucket
BUCKET_URI           : gs://mabv1-hybrid-vertex-bucket
DATA_GCS_PREFIX      : data
DATA_PATH            : gs://mabv1-hybrid-vertex-bucket/data
VPC_NETWORK_FULL     : projects/934903580331/global/networks/ucaip-haystack-vpc-network
BIGQUERY_DATASET_ID  : hybrid-vertex.movielens_dataset_mabv1
BIGQUERY_TABLE_ID    : hybrid-vertex.movielens_dataset_mabv1.training_dataset


In [13]:
# create bucket
! gsutil mb -l $REGION $BUCKET_URI

In [24]:
! gsutil ls $BUCKET_URI

## Repo structure

* these variables are used to structure the repo
* this means they are required for correctly building Dockerfile's, importing classes, etc.

In [31]:
REPO_DOCKER_PATH_PREFIX          = 'src'
RL_SUB_DIR                       = 'per_arm_rl'

print(f"REPO_DOCKER_PATH_PREFIX  : {REPO_DOCKER_PATH_PREFIX}")
print(f"RL_SUB_DIR               : {RL_SUB_DIR}")

REPO_DOCKER_PATH_PREFIX  : src
RL_SUB_DIR               : per_arm_rl


## Save Notebook Configuration Data
If you want to avoid having to re-enter these across notebooks

In [32]:
config = f"""
PROJECT_ID               = \"{PROJECT_ID}\"
PROJECT_NUM              = \"{PROJECT_NUM}\"
LOCATION                 = \"{LOCATION}\"

REGION                   = \"{REGION}\"
BQ_LOCATION              = \"{BQ_LOCATION}\"
VPC_NETWORK_NAME         = \"{VPC_NETWORK_NAME}\"

VERTEX_SA                = \"{VERTEX_SA}\"

PREFIX                   = \"{PREFIX}\"
VERSION                  = \"{VERSION}\"

BUCKET_NAME              = \"{BUCKET_NAME}\"
DATA_GCS_PREFIX          = \"{DATA_GCS_PREFIX}\"
DATA_PATH                = \"{DATA_PATH}\"
BUCKET_URI               = \"{BUCKET_URI}\"

VPC_NETWORK_FULL         = \"{VPC_NETWORK_FULL}\"

BIGQUERY_DATASET_ID      = \"{BIGQUERY_DATASET_ID}\"
BIGQUERY_TABLE_ID        = \"{BIGQUERY_TABLE_ID}\"

REPO_DOCKER_PATH_PREFIX  = \"{REPO_DOCKER_PATH_PREFIX}\"
RL_SUB_DIR               = \"{RL_SUB_DIR}\"
"""
print(config)


PROJECT_ID               = "hybrid-vertex"
PROJECT_NUM              = "934903580331"
LOCATION                 = "us-central1"

REGION                   = "us-central1"
BQ_LOCATION              = "US"
VPC_NETWORK_NAME         = "ucaip-haystack-vpc-network"

VERTEX_SA                = "934903580331-compute@developer.gserviceaccount.com"

PREFIX                   = "mabv1"
VERSION                  = "v1"

BUCKET_NAME              = "mabv1-hybrid-vertex-bucket"
DATA_GCS_PREFIX          = "data"
DATA_PATH                = "gs://mabv1-hybrid-vertex-bucket/data"
BUCKET_URI               = "gs://mabv1-hybrid-vertex-bucket"

VPC_NETWORK_FULL         = "projects/934903580331/global/networks/ucaip-haystack-vpc-network"

BIGQUERY_DATASET_ID      = "hybrid-vertex.movielens_dataset_mabv1"
BIGQUERY_TABLE_ID        = "hybrid-vertex.movielens_dataset_mabv1.training_dataset"

REPO_DOCKER_PATH_PREFIX  = "src"
RL_SUB_DIR               = "per_arm_rl"



In [33]:
!echo '{config}' | gsutil cp - {BUCKET_URI}/config/notebook_env.py

Copying from <STDIN>...
/ [1 files][    0.0 B/    0.0 B]                                                
Operation completed over 1 objects.                                              


In [34]:
!gsutil ls $BUCKET_URI

gs://mabv1-hybrid-vertex-bucket/aiplatform-custom-training-2023-07-13-22:02:47.458/
gs://mabv1-hybrid-vertex-bucket/aiplatform-custom-training-2023-07-14-12:09:06.643/
gs://mabv1-hybrid-vertex-bucket/aiplatform-custom-training-2023-07-14-12:46:07.509/
gs://mabv1-hybrid-vertex-bucket/aiplatform-custom-training-2023-07-14-13:48:08.938/
gs://mabv1-hybrid-vertex-bucket/archived/
gs://mabv1-hybrid-vertex-bucket/config/
gs://mabv1-hybrid-vertex-bucket/data/
gs://mabv1-hybrid-vertex-bucket/data_stats/
gs://mabv1-hybrid-vertex-bucket/perarm-local-test/
gs://mabv1-hybrid-vertex-bucket/scale-perarm-hpt/
