Skip to content

Commit

Permalink
Add automated testing for the media player model (Close #10)
Browse files Browse the repository at this point in the history
  • Loading branch information
agnessnowplow committed Aug 4, 2022
1 parent 002e755 commit f6410ab
Show file tree
Hide file tree
Showing 49 changed files with 5,010 additions and 182 deletions.
146 changes: 146 additions & 0 deletions .github/workflows/pr_tests.yml
@@ -0,0 +1,146 @@
name: pr_tests

on:
pull_request:
branches:
- main
- 'release/**'

env:
# Set profiles.yml directory
DBT_PROFILES_DIR: ./ci

# Redshift Connection
REDSHIFT_TEST_HOST: ${{ secrets.REDSHIFT_TEST_HOST }}
REDSHIFT_TEST_USER: ${{ secrets.REDSHIFT_TEST_USER }}
REDSHIFT_TEST_PASS: ${{ secrets.REDSHIFT_TEST_PASS }}
REDSHIFT_TEST_DBNAME: ${{ secrets.REDSHIFT_TEST_DBNAME }}
REDSHIFT_TEST_PORT: ${{ secrets.REDSHIFT_TEST_PORT }}

# BigQuery Connection
BIGQUERY_SERVICE_KEYFILE: ${{ secrets.BIGQUERY_SERVICE_KEYFILE }}
BIGQUERY_SERVICE_KEY_PATH: ./dbt-service-account.json
BIGQUERY_TEST_DATABASE: ${{ secrets.BIGQUERY_TEST_DATABASE }}
BIGQUERY_LOCATION: ${{ secrets.BIGQUERY_LOCATION }}

# Snowflake Connection
SNOWFLAKE_TEST_ACCOUNT: ${{ secrets.SNOWFLAKE_TEST_ACCOUNT }}
SNOWFLAKE_TEST_USER: ${{ secrets.SNOWFLAKE_TEST_USER }}
SNOWFLAKE_TEST_PASSWORD: ${{ secrets.SNOWFLAKE_TEST_PASSWORD }}
SNOWFLAKE_TEST_ROLE: ${{ secrets.SNOWFLAKE_TEST_ROLE }}
SNOWFLAKE_TEST_DATABASE: ${{ secrets.SNOWFLAKE_TEST_DATABASE }}
SNOWFLAKE_TEST_WAREHOUSE: ${{ secrets.SNOWFLAKE_TEST_WAREHOUSE }}

# Postgres Connection
POSTGRES_TEST_HOST: ${{ secrets.POSTGRES_TEST_HOST }}
POSTGRES_TEST_USER: ${{ secrets.POSTGRES_TEST_USER }}
POSTGRES_TEST_PASS: ${{ secrets.POSTGRES_TEST_PASS }}
POSTGRES_TEST_PORT: ${{ secrets.POSTGRES_TEST_PORT }}
POSTGRES_TEST_DBNAME: ${{ secrets.POSTGRES_TEST_DBNAME }}

# Databricks Connection
DATABRICKS_TEST_HOST: ${{ secrets.DATABRICKS_TEST_HOST }}
DATABRICKS_TEST_HTTP_PATH: ${{ secrets.DATABRICKS_TEST_HTTP_PATH }}
DATABRICKS_TEST_TOKEN: ${{ secrets.DATABRICKS_TEST_TOKEN }}
DATABRICKS_TEST_ENDPOINT: ${{ secrets.DATABRICKS_TEST_ENDPOINT }}

jobs:
pr_tests:
name: pr_tests
runs-on: ubuntu-latest
defaults:
run:
# Run tests from integration_tests sub dir
working-directory: ./integration_tests
strategy:
matrix:
dbt_version: ["1.*"]
warehouse: ["postgres", "bigquery", "snowflake", "databricks"] # TODO: Add RS self-hosted runner

services:
postgres:
image: postgres:latest
env:
POSTGRES_DB: ${{ secrets.POSTGRES_TEST_DBNAME }}
POSTGRES_USER: ${{ secrets.POSTGRES_TEST_USER }}
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_TEST_PASS }}
# Set health checks to wait until postgres has started
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
# Maps tcp port 5432 on service container to the host
- 5432:5432

steps:
- name: Check out
uses: actions/checkout@v2

# Remove '*' and replace '.' with '_' in DBT_VERSION & set as SCHEMA_SUFFIX.
# SCHEMA_SUFFIX allows us to run multiple versions of dbt in parallel without overwriting the output tables
- name: Set SCHEMA_SUFFIX env
run: echo "SCHEMA_SUFFIX=$(echo ${DBT_VERSION%.*} | tr . _)" >> $GITHUB_ENV
env:
DBT_VERSION: ${{ matrix.dbt_version }}

- name: Set DEFAULT_TARGET env
run: |
echo "DEFAULT_TARGET=${{ matrix.warehouse }}" >> $GITHUB_ENV
- name: Write BigQuery creds to json file
run: |
echo "$BIGQUERY_SERVICE_KEYFILE" > ./dbt-service-account.json
- name: Python setup
uses: actions/setup-python@v2
with:
python-version: "3.8.x"

- name: Pip cache
uses: actions/cache@v2
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ matrix.dbt_version }}-${{ matrix.warehouse }}
restore-keys: |
${{ runner.os }}-pip-${{ matrix.dbt_version }}-${{ matrix.warehouse }}
# Install latest patch version. Upgrade if cache contains old patch version.
- name: Install dependencies
run: |
pip install --upgrade pip wheel setuptools
pip install -Iv "dbt-${{ matrix.warehouse }}"==${{ matrix.dbt_version }} --upgrade
dbt deps
if: ${{matrix.warehouse != 'spark'}}

- name: Install spark dependencies
run: |
pip install --upgrade pip wheel setuptools
pip install -Iv "dbt-${{ matrix.warehouse }}[ODBC]"==${{ matrix.dbt_version }} --upgrade
dbt deps
if: ${{matrix.warehouse == 'spark'}}

- name: Block concurrent executions tests
uses: softprops/turnstyle@v1
with:
poll-interval-seconds: 20
same-branch-only: false
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: "Connect to database"
run: |
dbt debug
- name: "Pre-test: Drop ci schemas"
run: |
dbt run-operation post_ci_cleanup --target ${{ matrix.warehouse }}
- name: Run tests
run: ./.scripts/integration_test.sh -d ${{ matrix.warehouse }}

# post_ci_cleanup sits in utils package
- name: "Post-test: Drop ci schemas"
run: |
dbt run-operation post_ci_cleanup --target ${{ matrix.warehouse }}
4 changes: 4 additions & 0 deletions integration_tests/.gitignore
@@ -0,0 +1,4 @@

target/
dbt_modules/
logs/
47 changes: 47 additions & 0 deletions integration_tests/.scripts/integration_test.sh
@@ -0,0 +1,47 @@
#!/bin/bash

# Expected input:
# -d (database) target database for dbt

while getopts 'd:' opt
do
case $opt in
d) DATABASE=$OPTARG
esac
done

declare -a SUPPORTED_DATABASES=("bigquery" "databricks" "postgres" "redshift" "snowflake")

# set to lower case
DATABASE="$(echo $DATABASE | tr '[:upper:]' '[:lower:]')"

if [[ $DATABASE == "all" ]]; then
DATABASES=( "${SUPPORTED_DATABASES[@]}" )
else
DATABASES=$DATABASE
fi

for db in ${DATABASES[@]}; do

echo "Snowplow media player integration tests: Seeding data"

eval "dbt seed --target $db --full-refresh" || exit 1;

echo "Snowplow media player integration tests: Execute models - run 1/3"

eval "dbt run --target $db --full-refresh" || exit 1;

for i in {2..3}
do
echo "Snowplow media player integration tests: Execute models - run $i/3"

eval "dbt run --target $db" || exit 1;
done

echo "Snowplow media player integration tests: Test models"

eval "dbt test --target $db" || exit 1;

echo "Snowplow media player integration tests: All tests passed"

done
24 changes: 24 additions & 0 deletions integration_tests/README.md
@@ -0,0 +1,24 @@
# snowplow-media-player-integration-tests

Integration test suite for the snowplow-media-player dbt package.

The `./scripts` directory contains two scripts:

- `integration_tests.sh`: This tests the standard modules of the snowplow-media-player package. It runs the Snowplow media player package 4 times to replicate incremental loading of events, then performs an equality test between the actual vs expected output.

- `integration_tests_w_custom_module.sh`: This tests the standard modules of the snowplow-media-player package as well as the back-filling of custom modules. In total the package is run 6 times, with run 1-2 being the standard modules, runs 3-4 being the back-filling of the newly introduced custom module, and runs 5-6 being the both the standard and custom module. Once complete, equality checks are performed on the actual vs expected output of the standard modules.

Run the scripts using:

```bash
bash integration_tests.sh -d {warehouse}
```

Supported warehouses:

- bigquery
- databricks
- postgres
- redshift
- snowflake
- all (iterates through all supported warehouses)
68 changes: 68 additions & 0 deletions integration_tests/ci/profiles.yml
@@ -0,0 +1,68 @@

# HEY! This file is used in the Snowplow dbt Media Player integration tests.
# You should __NEVER__ check credentials into version control. Thanks for reading :)

config:
send_anonymous_usage_stats: False
use_colors: True

integration_tests:
target: "{{ env_var('DEFAULT_TARGET') }}"
outputs:
postgres:
type: postgres
host: "{{ env_var('POSTGRES_TEST_HOST') }}"
user: "{{ env_var('POSTGRES_TEST_USER') }}"
pass: "{{ env_var('POSTGRES_TEST_PASS') }}"
port: "{{ env_var('POSTGRES_TEST_PORT') | as_number }}"
dbname: "{{ env_var('POSTGRES_TEST_DBNAME') }}"
schema: "github_snwplow_media_player_dbt_{{ env_var('SCHEMA_SUFFIX') }}"
threads: 4

redshift:
type: redshift
host: "{{ env_var('REDSHIFT_TEST_HOST') }}"
user: "{{ env_var('REDSHIFT_TEST_USER') }}"
pass: "{{ env_var('REDSHIFT_TEST_PASS') }}"
dbname: "{{ env_var('REDSHIFT_TEST_DBNAME') }}"
port: "{{ env_var('REDSHIFT_TEST_PORT') | as_number }}"
schema: "github_snwplow_media_player_dbt_{{ env_var('SCHEMA_SUFFIX') }}"
threads: 4

bigquery:
type: bigquery
method: service-account
keyfile: "{{ env_var('BIGQUERY_SERVICE_KEY_PATH') }}"
project: "{{ env_var('BIGQUERY_TEST_DATABASE') }}"
location: "{{ env_var('BIGQUERY_LOCATION') }}"
schema: "github_snwplow_media_player_dbt_{{ env_var('SCHEMA_SUFFIX') }}"
threads: 4

snowflake:
type: snowflake
account: "{{ env_var('SNOWFLAKE_TEST_ACCOUNT') }}"
user: "{{ env_var('SNOWFLAKE_TEST_USER') }}"
password: "{{ env_var('SNOWFLAKE_TEST_PASSWORD') }}"
role: "{{ env_var('SNOWFLAKE_TEST_ROLE') }}"
database: "{{ env_var('SNOWFLAKE_TEST_DATABASE') }}"
warehouse: "{{ env_var('SNOWFLAKE_TEST_WAREHOUSE') }}"
schema: "github_snwplow_media_player_dbt_{{ env_var('SCHEMA_SUFFIX') }}"
threads: 4

databricks:
type: databricks
schema: "github_snwplow_media_player_dbt_{{ env_var('SCHEMA_SUFFIX') }}"
host: "{{ env_var('DATABRICKS_TEST_HOST') }}"
http_path: "{{ env_var('DATABRICKS_TEST_HTTP_PATH') }}"
token: "{{ env_var('DATABRICKS_TEST_TOKEN') }}"
threads: 4

spark:
type: spark
method: odbc
driver: "{{ env_var('DATABRICKS_TEST_HTTP_PATH') }}"
schema: "github_snwplow_web_dbt_{{ env_var('SCHEMA_SUFFIX') }}"
host: "{{ env_var('DATABRICKS_TEST_HOST') }}"
token: "{{ env_var('DATABRICKS_TEST_TOKEN') }}"
endpoint: "{{ env_var('DATABRICKS_TEST_ENDPOINT') }}"
threads: 4

0 comments on commit f6410ab

Please sign in to comment.