Skip to content

Commit

Permalink
Add BigQuery support to snowplow_incremental (Close #7)
Browse files Browse the repository at this point in the history
  • Loading branch information
bill-warner committed Aug 20, 2021
1 parent 5c3574c commit 5d9941b
Show file tree
Hide file tree
Showing 14 changed files with 442 additions and 47 deletions.
38 changes: 38 additions & 0 deletions integration_tests/.scripts/test_materializations.sh
@@ -0,0 +1,38 @@
#!/bin/bash

# Expected input:
# -d (database) target database for dbt
# -s (seed) boolean of whether to seed test data. Default true.

SEED_DATA=true

while getopts 'd:s:' opt
do
case $opt in
d) DATABASE=$OPTARG ;;
s) SEED_DATA=$OPTARG ;;
esac
done


if [ "$SEED_DATA" = true ]; then

echo "Snowplow-utils integration tests: Seeding data"

eval "dbt seed --target $DATABASE --full-refresh" || exit 1;

fi

echo "Test materializations: Refresh models"

eval "dbt run --models materializations --target $DATABASE --full-refresh " || exit 1;

echo "Test materializations: Execute models"

eval "dbt run --models materializations --target $DATABASE" || exit 1;

echo "Test materializations: Test models"

eval "dbt test --models materializations --target $DATABASE" || exit 1;

echo "Test materializations: All tests passed"
@@ -0,0 +1,8 @@
id,start_tstamp
1,2021-01-01 00:00:00
1,2021-03-05 00:00:00
2,2021-03-02 00:00:00
2,2021-03-02 00:00:00
3,2021-03-07 00:00:00
4,2021-03-04 00:00:00
5,2021-03-08 00:00:00
@@ -0,0 +1,9 @@
id,start_tstamp
1,2021-01-01 00:00:00
1,2021-03-05 00:00:00
2,2021-03-01 00:00:00
2,2021-03-02 00:00:00
3,2021-03-07 00:00:00
3,2021-03-07 00:00:00
4,2021-03-04 00:00:00
5,2021-03-08 00:00:00
@@ -0,0 +1,10 @@
run,id,start_tstamp
1,1,2021-01-01 00:00:00
1,2,2021-03-01 00:00:00
1,2,2021-03-03 00:00:00
1,3,2021-03-03 00:00:00
1,4,2021-03-04 00:00:00
2,1,2021-03-05 00:00:00
2,2,2021-03-02 00:00:00
2,3,2021-03-07 00:00:00
2,5,2021-03-08 00:00:00
@@ -0,0 +1,7 @@
id,start_tstamp
1,2021-01-01 00:00:00
4,2021-03-04 00:00:00
5,2021-03-08 00:00:00
1,2021-03-05 00:00:00
2,2021-03-02 00:00:00
3,2021-03-07 00:00:00
@@ -0,0 +1,8 @@
id,start_tstamp
2,2021-03-01 00:00:00
3,2021-03-07 00:00:00
2,2021-03-02 00:00:00
1,2021-01-01 00:00:00
4,2021-03-04 00:00:00
1,2021-03-05 00:00:00
5,2021-03-08 00:00:00
27 changes: 27 additions & 0 deletions integration_tests/dbt_project.yml
Expand Up @@ -51,3 +51,30 @@ seeds:
last_success: timestamp
collector_tstamp: timestamp
expected_last_success: timestamp

materializations:
data_snowplow_incremental:
+column_types:
run: integer
id: integer
start_tstamp: timestamp
redshift:
enabled: "{{ target.type == 'redshift' | as_bool() }}"
data_snowplow_incremental_expected:
+column_types:
id: integer
start_tstamp: timestamp
data_snowplow_incremental_w_lookback_disabled_expected:
+column_types:
id: integer
start_tstamp: timestamp
bigquery:
enabled: "{{ target.type == 'bigquery' | as_bool() }}"
data_snowplow_incremental_expected:
+column_types:
id: integer
start_tstamp: timestamp
data_snowplow_incremental_w_lookback_disabled_expected:
+column_types:
id: integer
start_tstamp: timestamp
12 changes: 12 additions & 0 deletions integration_tests/models/materializations/materializations.yml
@@ -0,0 +1,12 @@
version: 2

models:
- name: test_snowplow_incremental
tests:
- dbt_utils.equality:
compare_model: ref('data_snowplow_incremental_expected')
- name: test_snowplow_incremental_w_lookback_disabled
tests:
- dbt_utils.equality:
compare_model: ref('data_snowplow_incremental_w_lookback_disabled_expected')

@@ -0,0 +1,43 @@
{# Tests both RS (delete/insert) and BQ (merge) snowplow_incremental materialization
upsert_date_key: RS only. Key used to limit the table scan
partition_by: BQ only. Key used to limit table scan
TODO: Add tests that change the granularity of the partition #}

{{
config(
materialized='snowplow_incremental',
unique_key='id',
upsert_date_key='start_tstamp',
partition_by = {
"field": "start_tstamp",
"data_type": "timestamp",
"granularity": "day"
},
)
}}

with data as (
select * from {{ ref('data_snowplow_incremental') }}
)

{% if snowplow_utils.snowplow_is_incremental() %}

select
id,
start_tstamp

from data
where run = 2

{% else %}

select
id,
start_tstamp

from data
where run = 1

{% endif %}


@@ -0,0 +1,43 @@
{# Tests both RS (delete/insert) and BQ (merge) snowplow_incremental materialization with lookback disabled.
upsert_date_key: RS only. Key used to limit the table scan
partition_by: BQ only. Key used to limit table scan #}

{{
config(
materialized='snowplow_incremental',
unique_key='id',
upsert_date_key='start_tstamp',
disable_upsert_lookback=true,
partition_by = {
"field": "start_tstamp",
"data_type": "timestamp",
"granularity": "day"
},
)
}}

with data as (
select * from {{ ref('data_snowplow_incremental') }}
)

{% if snowplow_utils.snowplow_is_incremental() %}

select
id,
start_tstamp

from data
where run = 2

{% else %}

select
id,
start_tstamp

from data
where run = 1

{% endif %}


0 comments on commit 5d9941b

Please sign in to comment.