Skip to content

Commit

Permalink
Attempt to get Spark OHDSI tests running
Browse files Browse the repository at this point in the history
  • Loading branch information
aguynamedryan committed Nov 30, 2023
1 parent 16cc9c5 commit bf2e331
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 5 deletions.
48 changes: 47 additions & 1 deletion .github/workflows/run_tests.yml
Expand Up @@ -32,4 +32,50 @@ jobs:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Run Tests
run: docker-compose run conceptql
run: docker-compose run conceptql
Run-Spark-Tests:
strategy:
matrix:
include:
- vocab: gdm
- vocab: ohdsi
runs-on: ubuntu-22.04
env:
SPARK_VERSION: 3.5.0
CONCEPTQL_DATA_MODEL: gdm
steps:
- uses: actions/checkout@v3
- uses: ruby/setup-ruby@v1
with:
ruby-version: 3.2
bundler-cache: true

- uses: actions/cache@v3
with:
path: ~/spark
key: spark-${{ env.SPARK_VERSION }}
id: cache-spark
- name: Download Spark
if: steps.cache-spark.outputs.cache-hit != 'true'
run: |
wget -q https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3.tgz
tar xzf spark-$SPARK_VERSION-bin-hadoop3.tgz
mv spark-$SPARK_VERSION-bin-hadoop3 ~/spark
- uses: actions/cache@v3
with:
path: /tmp/synpuf_test_data
key: synpuf-test-data
id: cache-synpuf-test-data
- name: Download Data
if: steps.cache-synpuf-test-data.outputs.cache-hit != 'true'
run: |
cd /tmp
curl -sSL "https://www.dropbox.com/scl/fi/hha5zjm9d5ezkk8bfvtnc/synpuf_test_data.tgz?rlkey=lythw2s6342609ave66cam2ms&dl=1" > synpuf_test_data.tgz
tar xzf synpuf_test_data.tgz
- run: ~/spark/sbin/start-thriftserver.sh --driver-memory 5G && sleep 20
- run: bundle exec ruby test/all.rb
env:
CONCEPTQL_PARQUET_TEST_DIR: /tmp/synpuf_test_data/${{ matrix.vocab }}
SEQUELIZER_URL: hexspace://localhost:10000/default
2 changes: 1 addition & 1 deletion Gemfile
Expand Up @@ -3,7 +3,7 @@ source 'https://rubygems.org'
# Specify your gem's dependencies in conceptql.gemspec
gemspec
gem "pg"
gem "sequel-hexspace", path: "../sequel-hexspace"
gem "sequel-hexspace", github: "outcomesinsights/sequel-hexspace"

group :test, :development do
gem "nokogiri"
Expand Down
5 changes: 3 additions & 2 deletions Gemfile.lock
@@ -1,5 +1,6 @@
PATH
remote: ../sequel-hexspace
GIT
remote: https://github.com/outcomesinsights/sequel-hexspace.git
revision: a6cddba0a45283581a8bf3e1a4580a146ea4eb1c
specs:
sequel-hexspace (1.0.0)
hexspace
Expand Down
2 changes: 1 addition & 1 deletion dockers/standard/Dockerfile
@@ -1,4 +1,4 @@
FROM ruby:2.7-slim
FROM ruby:3.2-slim-bullseye

ENV PATH="/root/.local/bin:${PATH}"

Expand Down
4 changes: 4 additions & 0 deletions lib/conceptql/spark_prepper.rb
Expand Up @@ -20,5 +20,9 @@ def prep
db.create_view(table_name, temp: true, if_not_exists: true, using: 'org.apache.spark.sql.parquet', options: { path: parquet_file.expand_path })
end
end
if ENV["CI"].present?
# Broadcast joins are running out of memory in GitHub Actions
#db.run("SET spark.sql.autoBroadcastJoinThreshold=6134169")
end
end
end

0 comments on commit bf2e331

Please sign in to comment.