Skip to content

Commit

Permalink
Merge pull request #33 from kgmcquate/develop
Browse files Browse the repository at this point in the history
Added Databricks Support
  • Loading branch information
kgmcquate committed Jan 6, 2024
2 parents 37de5b6 + d121ea3 commit 2fcd6cc
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 4 deletions.
37 changes: 36 additions & 1 deletion .github/workflows/dbt_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -191,4 +191,39 @@ jobs:
- name: dbt test seeds
run: dbt test --target ${DBT_TARGET} --select config.materialized:seed --exclude "dbt_utils_recency*" --exclude tag:dataset-colnames_with_spaces


test-databricks:
name: Test on Databricks
runs-on: ubuntu-latest
container:
image: python:3.11

env:
DBT_TARGET: databricks
DATABRICKS_CLIENT_ID: ${{ secrets.DATABRICKS_CLIENT_ID }}
DATABRICKS_CLIENT_SECRET: ${{ secrets.DATABRICKS_CLIENT_SECRET }}

steps:
- uses: actions/checkout@v4

- name: pip install
run: pip install dbt-core dbt-${DBT_TARGET}

- name: Install DBT deps
run: dbt deps --target ${DBT_TARGET}

- name: load test data
run: dbt seed --target ${DBT_TARGET} --exclude colnames_with_spaces

- name: dbt test macros
run: dbt test --target ${DBT_TARGET} --exclude tag:dataset-colnames_with_spaces

- name: Create seed tests
run: |
dbt compile --target ${DBT_TARGET} -q \
--inline "{{ testgen.get_test_suggestions(ref('users'), resource_type='seeds', column_config={'quote': true, 'tags': ['dataset-users']} ) }}" \
> seeds/users_test_suggestions.yml
- name: dbt test seeds
run: dbt test --target ${DBT_TARGET} --select config.materialized:seed --exclude "dbt_utils_recency*" --exclude tag:dataset-colnames_with_spaces

1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ packages:
# Supported Databases
The following databases are supported:
- Snowflake
- Databricks
- RedShift
- BigQuery
- Postgres
Expand Down
15 changes: 13 additions & 2 deletions integration_tests/profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,16 @@ integration_tests:
method: service-account
project: dbt-testgen
dataset: dbt_testgen
threads: 1
keyfile: gcp_keyfile.json
threads: 4
keyfile: gcp_keyfile.json

databricks:
type: databricks
#catalog: [optional catalog name if you are using Unity Catalog]
schema: hive_metastore # Required
host: dbc-ce3894e5-7741.cloud.databricks.com # Required
http_path: /sql/1.0/warehouses/f16c9d04325bef22 # Required
auth_type: oauth
client_id: "{{ env_var('DATABRICKS_CLIENT_ID') }}"
client_secret: "{{ env_var('DATABRICKS_CLIENT_SECRET') }}"
threads: 4
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
{{ return("array_agg(CAST(" ~ adapter.quote(colname) ~ " AS STRING))") }}
{% endmacro %}

{% macro databricks__sql_agg_array(colname) %}
{{ return("to_json(array_agg(CAST(" ~ adapter.quote(colname) ~ " AS STRING)))") }}
{% endmacro %}


{% macro get_accepted_values_test_suggestions(
table_relation,
Expand Down Expand Up @@ -62,10 +66,13 @@
{% set columns = testgen.exclude_column_types(columns, exclude_types) %}
{% set columns = testgen.exclude_column_names(columns, exclude_cols) %}

{# {{ print(columns) }} #}
{% if columns|length == 0 %}
{{ return(dbt_config) }}
{% endif %}

{# {{ print(columns) }} #}

{% set count_distinct_exprs = [] %}
{% for column in columns %}
{# Use capitals for colnames because of snowflake #}
Expand Down Expand Up @@ -124,6 +131,7 @@
]
}
%}

{% for k,v in column_config.items() %}
{% do col_config.update({k: v}) %}
{% endfor %}
Expand Down
12 changes: 11 additions & 1 deletion macros/test_generation/get_string_length_test_suggestions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,18 @@
{% set columns = testgen.exclude_column_types(columns, exclude_types) %}
{% set columns = testgen.exclude_column_names(columns, exclude_cols) %}

{# {{ print(columns) }} #}

{% set string_cols = [] %}
{% for column in columns %}
{% if column.is_string() %}
{# {{ print(column.data_type) }} #}
{% if column.is_string() or column.data_type|lower in ["string"] %}
{% do string_cols.append(column) %}
{% endif %}
{% endfor %}

{# {{ print(string_cols) }} #}

{% if string_cols|length == 0 %}
{{ return(dbt_config) }}
{% endif %}
Expand Down Expand Up @@ -81,8 +86,13 @@

{% set results = testgen.query_as_list(min_max_sql) %}

{# {{ print(results) }} #}

{% set column_tests = [] %}
{% for result in results %}

{# {{ print(result) }} #}

{% set min_val = testgen.cast_number(result[1]) %}
{% set max_val = testgen.cast_number(result[2]) %}
{% set stddev = testgen.cast_number(result[3]) %}
Expand Down

0 comments on commit 2fcd6cc

Please sign in to comment.