In [11]:
%%writefile requirements.txt
kfp==1.8.14
google-cloud-pipeline-components==1.0.33
google-cloud-storage>=2.2.1
google-cloud-aiplatform>=1.21.0
fastapi
keras-tuner
uvicorn==0.17.6
joblib~=1.0
numpy~=1.20
scikit-learn~=0.24

Overwriting requirements.txt


In [2]:
%pip install -U --user -r requirements.txt

Collecting fastapi
  Using cached fastapi-0.89.1-py3-none-any.whl (55 kB)
Collecting starlette==0.22.0
  Using cached starlette-0.22.0-py3-none-any.whl (64 kB)
Installing collected packages: starlette, fastapi
Successfully installed fastapi-0.89.1 starlette-0.22.0
Note: you may need to restart the kernel to use updated packages.


In [2]:
import kfp
print('KFP SDK version: {}'.format(kfp.__version__))

KFP SDK version: 2.0.0-beta.10


## https://cloud.google.com/vertex-ai/docs/pipelines/gcpc-list

In [3]:
import google_cloud_pipeline_components
print('google_cloud_pipeline_components version: {}'.format(google_cloud_pipeline_components.__version__))

google_cloud_pipeline_components version: 1.0.33


### Auxiliary variables

In [4]:
PREFIX = "ccc"

In [5]:
BIGQUERY_PROJECT_ID = "datafusionsbox"
BIGQUERY_DATASET = "dataset4ccc"
BIGQUERY_TABLE = "df_for_model_ccc_with_weights"  ###"df_for_model_ccc_total"
BIGQUERY_DATASET_REGION = "us"
BIGQUERY_PROJECT_ID, BIGQUERY_DATASET, BIGQUERY_DATASET_REGION, BIGQUERY_TABLE

('datafusionsbox', 'dataset4ccc', 'us', 'df_for_model_ccc_with_weights')

In [6]:
shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null
VERTEXAI_PROJECT_ID = shell_output[0]
VERTEXAI_REGION = "us-central1"
VERTEXAI_PROJECT_ID, VERTEXAI_REGION 

('datafusionsbox', 'us-central1')

In [7]:
BUCKET_NAME = f"gcp-demo-{PREFIX}-vertexai"
BUCKET_URI = f"gs://{BUCKET_NAME}"

BUCKET_NAME, BUCKET_URI

('gcp-demo-ccc-vertexai', 'gs://gcp-demo-ccc-vertexai')

In [10]:
! gsutil mb -l $VERTEXAI_REGION -p $VERTEXAI_PROJECT_ID $BUCKET_URI

Creating gs://gcp-demo-ccc-vertexai/...
ServiceException: 409 A Cloud Storage bucket named 'gcp-demo-ccc-vertexai' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.


## Dotenv is a zero-dependency module that loads environment variables from a .env file

In [8]:
f = open(".env", "w")
f.write("BIGQUERY_PROJECT_ID="+str(BIGQUERY_PROJECT_ID)+"\n")
f.write("BIGQUERY_DATASET="+str(BIGQUERY_DATASET)+"\n")
f.write("BIGQUERY_DATASET_REGION="+str(BIGQUERY_DATASET_REGION)+"\n")
f.write("BIGQUERY_TABLE="+str(BIGQUERY_TABLE)+"\n")

f.write("VERTEXAI_PROJECT_ID="+str(VERTEXAI_PROJECT_ID)+"\n")
f.write("VERTEXAI_REGION="+str(VERTEXAI_REGION)+"\n")

f.write("BUCKET_NAME="+str(BUCKET_NAME)+"\n")
f.write("BUCKET_URI="+str(BUCKET_URI)+"\n")
f.write("BUCKET_REGION="+str(VERTEXAI_REGION)+"\n")

f.write("PREFIX="+str(PREFIX)+"\n")
f.close()

In [9]:
from dotenv import load_dotenv
load_dotenv(dotenv_path='.env', verbose=True)

True

In [10]:
import os
BIGQUERY_PROJECT_ID = os.environ.get('BIGQUERY_PROJECT_ID')
BIGQUERY_DATASET = os.environ.get('BIGQUERY_DATASET')
BIGQUERY_DATASET_REGION = os.environ.get('BIGQUERY_DATASET_REGION')
BIGQUERY_TABLE = os.environ.get('BIGQUERY_TABLE')

VERTEXAI_PROJECT_ID = os.environ.get('VERTEXAI_PROJECT_ID')
VERTEXAI_REGION = os.environ.get('VERTEXAI_REGION')

BUCKET_NAME = os.environ.get('BUCKET_NAME')
BUCKET_URI = os.environ.get('BUCKET_URI')
BUCKET_REGION = os.environ.get('BUCKET_REGION')

PREFIX = os.environ.get('PREFIX')

print("BIGQUERY_PROJECT_ID: ",BIGQUERY_PROJECT_ID)
print("BIGQUERY_DATASET: ",BIGQUERY_DATASET)
print("BIGQUERY_DATASET_REGION: ",BIGQUERY_DATASET_REGION)
print("BIGQUERY_TABLE: ",BIGQUERY_TABLE)

print("VERTEXAI_PROJECT_ID: ",VERTEXAI_PROJECT_ID)
print("VERTEXAI_REGION: ",VERTEXAI_REGION)

print("BUCKET_NAME: ",BUCKET_NAME)
print("BUCKET_URI: ",BUCKET_NAME)
print("BUCKET_REGION: ",VERTEXAI_REGION)

BIGQUERY_PROJECT_ID:  datafusionsbox
BIGQUERY_DATASET:  dataset4ccc
BIGQUERY_DATASET_REGION:  us
BIGQUERY_TABLE:  df_for_model_ccc_with_weights
VERTEXAI_PROJECT_ID:  datafusionsbox
VERTEXAI_REGION:  us-central1
BUCKET_NAME:  gcp-demo-ccc-vertexai
BUCKET_URI:  gcp-demo-ccc-vertexai
BUCKET_REGION:  us-central1
