Follow the steps laid out in the Medium story & clone the repository.
pd.to_gbq('table_name',if_exists='param')
pd.read_gbq(sql, dialect='legacy')
query_job = bigquery_client.query("""[SQL CODE]""")
results = query_job.result()
export PROJECT_ID='covid-jul25'
gcloud config set project $PROJECT_ID
export REGION=us-west3
export ZONE=us-west3-a
export BUCKET_LINK=gs://us-west3-{BUCKET_NAME}
export BUCKET=us-west3-{BUCKET_NAME}
export TEMPLATE_ID=daily_update_template
export cluster_name=covid-cluster
gcloud dataproc workflow-templates create
$TEMPLATE_ID --region $REGION
gcloud dataproc workflow-templates delete {TEMPLATE_NAME} --region=us-west3
gcloud dataproc workflow-templates set-managed-cluster \
$TEMPLATE_ID \
--region $REGION \
--zone $ZONE \
--cluster-name $cluster_name \
--optional-components=ANACONDA \
--master-machine-type n1-standard-4 \
--master-boot-disk-size 20 \
--worker-machine-type n1-standard-4 \
--worker-boot-disk-size 20 \
--num-workers 2 \
--image-version 1.4 \
--metadata='PIP_PACKAGES=pandas google.cloud pandas-gbq' \
--initialization-actions gs://us-west3-{BUCKET_NAME}/pip-install.sh
export STEP_ID=arima_update
gcloud dataproc workflow-templates add-job pyspark \
$BUCKET_LINK/daily_update.py \
--step-id $STEP_ID \
--workflow-template $TEMPLATE_ID \
--region $REGION
gcloud dataproc workflow-templates list --region $REGION
export REGION=us-east4
export TEMPLATE_ID=daily_update
time gcloud dataproc workflow-templates instantiate \
$TEMPLATE_ID --region $REGION #--async