## Merlin Setup

The goal of this notebook is to create a user-managed container that can be used in Vertex Workbench

### Setup variables, paths, and create artifact registry

In [None]:
REPO_NAME="workbench"
REGION="us-central1"
PROJECT="hybrid-vertex" # TODO: update with your project_id
IMAGE_ID="training"
MERLIN_IMAGE_NAME=f"{REGION}-docker.pkg.dev/{PROJECT}/{REPO_NAME}/merlin-{IMAGE_ID}"

MERLIN_CONTAINER="merlin_container"

!mkdir {MERLIN_CONTAINER}

!gcloud beta artifacts repositories create {REPO_NAME} \
    --repository-format=docker \
    --location=$REGION

In [None]:
!gcloud config set project $PROJECT

#### Find your service account for you tenant project - usually project_id-compute@developer.gserviceaccount.com

In [None]:
# !gcloud projects add-iam-policy-binding hybrid-vertex --member=serviceAccount:xxxxxxxx-compute@developer.gserviceaccount.com --role=roles/artifactregistry.admin

In [None]:
!gcloud auth configure-docker us-central1-docker.pkg.dev --quiet

### Create Docker image derived from Deeplearning containers

In [None]:
%%writefile {MERLIN_CONTAINER}/jupyter_notebook_config.py
c.NotebookApp.ip = '*'
c.NotebookApp.token = ''
c.NotebookApp.password = ''
c.NotebookApp.open_browser = False
c.NotebookApp.port = 8080
c.NotebookApp.terminado_settings = {'shell_command': ['/bin/bash']}
c.NotebookApp.allow_origin_pat = (
'(^https://8080-dot-[0-9]+-dot-devshell\.appspot\.com$)|'
'(^https://colab\.research\.google\.com$)|'
'((https?://)?[0-9a-z]+-dot-(?:us|asia|europe|northamerica|southamerica)-?[0-9a-z]+\.notebooks\.googleusercontent.com)')
c.NotebookApp.allow_remote_access = True
c.NotebookApp.disable_check_xsrf = False

In [None]:
%%writefile {MERLIN_CONTAINER}/Dockerfile
FROM nvcr.io/nvidia/merlin/merlin-training:22.04
RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg  add - && apt-get update -y && apt-get install google-cloud-sdk -y
RUN pip install google-cloud-aiplatform google-cloud-pipeline-components google-cloud-bigquery-storage kfp ipykernel
EXPOSE 8080
             
#RUN mkdir /root/.jupyter
             
COPY merlin_container/jupyter_notebook_config.py /root/.jupyter

ENV pwd=""
ENTRYPOINT exec jupyter-lab --ip=0.0.0.0 --port=8080 --no-browser --allow-root --ServerApp.allow_origin="*" --NotebookApp.token="$pwd" --NotebookApp.password="$pwd"

In [None]:
!docker build . -f $MERLIN_CONTAINER/Dockerfile -t $MERLIN_IMAGE_NAME
!docker push $MERLIN_IMAGE_NAME