mozilla · acmiyaguchi · Feb 1, 2021 · Jan 29, 2021 · Jan 29, 2021 · Jan 29, 2021
diff --git a/Dockerfile b/Dockerfile
@@ -42,22 +42,10 @@ USER 10001
 
 ENV PYTHONUNBUFFERED=1 \
     PORT=8000
-    # AWS_REGION= \
     # AWS_ACCESS_KEY_ID= \
     # AWS_SECRET_ACCESS_KEY= \
-    # SPARK_BUCKET= \
-    # AIRFLOW_BUCKET= \
-    # PRIVATE_OUTPUT_BUCKET= \
-    # PUBLIC_OUTPUT_BUCKET= \
-    # EMR_KEY_NAME= \
-    # EMR_FLOW_ROLE= \
-    # EMR_SERVICE_ROLE= \
-    # EMR_INSTANCE_TYPE= \
     # DEPLOY_ENVIRONMENT = \
-    # DEPLOY_TAG = \
-    # ARTIFACTS_BUCKET = \
-    # DATABRICKS_DEFAULT_IAM \
-
+    # DEPLOY_TAG =
 
 ENV AIRFLOW_HOME=/app \
     AIRFLOW_EMAIL_BACKEND="airflow.utils.email.send_email_smtp"

diff --git a/Dockerfile.dev b/Dockerfile.dev
@@ -37,22 +37,10 @@ COPY ./config/ /app
 USER 10001
 
 ENV PYTHONUNBUFFERED=1 \
-    AWS_REGION=us-west-2 \
-    SPARK_BUCKET=telemetry-spark-emr-2 \
-    AIRFLOW_BUCKET=telemetry-airflow \
-    PRIVATE_OUTPUT_BUCKET=telemetry-test-bucket \
-    PUBLIC_OUTPUT_BUCKET=telemetry-test-bucket \
-    EMR_KEY_NAME=20161025-dataops-dev \
-    EMR_FLOW_ROLE=telemetry-spark-cloudformation-TelemetrySparkInstanceProfile-1SATUBVEXG7E3 \
-    EMR_SERVICE_ROLE=EMR_DevRole \
-    EMR_INSTANCE_TYPE=c3.4xlarge \
     PORT=8000 \
     DEPLOY_ENVIRONMENT=dev \
     DEVELOPMENT=1 \
-    DEPLOY_TAG=master \
-    ARTIFACTS_BUCKET=net-mozaws-data-us-west-2-ops-ci-artifacts \
-    DATABRICKS_DEFAULT_IAM=arn:aws:iam::144996185633:instance-profile/databricks-ec2
-    # DEV_USERNAME=
+    DEPLOY_TAG=master
     # AWS_ACCESS_KEY_ID=
     # AWS_SECRET_ACCESS_KEY=
 

diff --git a/README.md b/README.md
@@ -39,16 +39,6 @@ An Airflow container can be built with
 make build
 ```
 
-### Export Credentials
-
-For now, DAGs that use the Databricks operator won't parse until the following environment variables are set (see issue #501):
-
-```
-AWS_SECRET_ACCESS_KEY
-AWS_ACCESS_KEY_ID
-DB_TOKEN
-```
-
 ### Migrate Database
 
 Airflow database migration is no longer a separate step for dev but is run by the web container if necessary on first run. That means, however, that you should run the web container (and the database container, of course) and wait for the database migrations to complete before running individual test commands per below. The easiest way to do this is to run `make up` and let it run until the migrations complete.
@@ -109,24 +99,6 @@ sed -i "s/10001/$(id -u)/g" Dockerfile.dev
 
 ```
 
-### Testing Databricks Jobs
-
-To run a job running on Databricks, run `make up` in the background. Follow
-[this guide on generating a
-token](https://docs.databricks.com/api/latest/authentication.html#generate-a-token)
-and save this to a secure location. Export the token to a an environment
-variable:
-
-```bash
-export DB_TOKEN=<TOKEN>
-```
-
-Finally, run the testing command using docker-compose directly:
-
-```bash
-docker-compose exec web airflow test example spark 20180101
-```
-
 ### Testing GKE Jobs (including BigQuery-etl changes)
 
 For now, follow the steps outlined here to create a service account: https://bugzilla.mozilla.org/show_bug.cgi?id=1553559#c1.
@@ -173,14 +145,6 @@ variables:
 
 - `AWS_ACCESS_KEY_ID` -- The AWS access key ID to spin up the Spark clusters
 - `AWS_SECRET_ACCESS_KEY` -- The AWS secret access key
-- `SPARK_BUCKET` -- The AWS S3 bucket where Spark related files are stored,
-  e.g. `telemetry-spark-emr-2`
-- `AIRFLOW_BUCKET` -- The AWS S3 bucket where airflow specific files are stored,
-  e.g. `telemetry-airflow`
-- `PUBLIC_OUTPUT_BUCKET` -- The AWS S3 bucket where public job results are
-  stored in, e.g. `telemetry-public-analysis-2`
-- `PRIVATE_OUTPUT_BUCKET` -- The AWS S3 bucket where private job results are
-  stored in, e.g. `telemetry-parquet`
 - `AIRFLOW_DATABASE_URL` -- The connection URI for the Airflow database, e.g.
   `mysql://username:password@hostname:port/database`
 - `AIRFLOW_BROKER_URL` -- The connection URI for the Airflow worker queue, e.g.
@@ -207,8 +171,7 @@ variables:
   `master` or `tags`. You can specify the tag or travis build exactly as well, e.g.
   `master/42.1` or `tags/v2.2.1`. Not specifying the exact tag or build will
   use the latest from that branch, or the latest tag.
-- `ARTIFACTS_BUCKET` -- The s3 bucket where the build artifacts can be found, e.g.
-  `net-mozaws-data-us-west-2-ops-ci-artifacts`
+
 
 Also, please set
 

diff --git a/bin/export_aws_credentials b/bin/export_aws_credentials
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+# Exports credentials from an aws credentials file into the environment so it
+# may be passed into the airflow container.
+# Usage: `source bin/export_aws_credentials`
+
+keys=$(grep default -A2 < ~/.aws/credentials  | tail -n2 | awk '{print $3}')
+export AWS_ACCESS_KEY_ID=$(echo $keys | head -n1)
+export AWS_SECRET_ACCESS_KEY=$(echo $keys | tail -n1)
diff --git a/bin/mozetl_runner.py b/bin/mozetl_runner.py
diff --git a/dags/example.py b/dags/example.py
diff --git a/dags/mozaggregator_release.py b/dags/mozaggregator_release.py
@@ -1,7 +1,7 @@
 from airflow import DAG
 from datetime import datetime, timedelta
-from airflow.operators.moz_databricks import MozDatabricksSubmitRunOperator
-from utils.mozetl import mozetl_envvar
+
+from airflow.operators.dummy_operator import DummyOperator
 
 default_args = {
     "owner": "frank@mozilla.com",
@@ -20,26 +20,11 @@
     schedule_interval="@daily",
 )
 
-release_telemetry_aggregate_view = MozDatabricksSubmitRunOperator(
+# See mozaggregator_prerelease and mozaggregator_mobile for functional
+# implementations using dataproc operator. This is not implemented due to the
+# migration to GCP and https://bugzilla.mozilla.org/show_bug.cgi?id=1517018
+release_telemetry_aggregate_view = DummyOperator(
     task_id="release_telemetry_aggregate_view",
     job_name="Release Telemetry Aggregate View",
-    release_label="6.1.x-scala2.11",
-    instance_count=40,
-    execution_timeout=timedelta(hours=12),
-    env=mozetl_envvar(
-        "aggregator",
-        {
-            "date": "{{ ds_nodash }}",
-            "channels": "release",
-            "credentials-bucket": "telemetry-spark-emr-2",
-            "credentials-prefix": "aggregator_database_envvars.json",
-            "num-partitions": 40 * 32,
-        },
-        dev_options={"credentials-prefix": "aggregator_dev_database_envvars.json"},
-        other={
-            "MOZETL_GIT_PATH": "https://github.com/mozilla/python_mozaggregator.git",
-            "MOZETL_EXTERNAL_MODULE": "mozaggregator",
-        },
-    ),
     dag=dag,
 )
diff --git a/dags/operators/email_schema_change_operator.py b/dags/operators/email_schema_change_operator.py