Skip to content

Data Pipeline

Data Pipeline #843

Workflow file for this run

# inspired by https://www.tjwaterman.com/web-scraping-with-github-actions/
name: Data Pipeline
on:
schedule:
- cron: '0 12 * * *' # daily at 8am (UTC-4)
workflow_dispatch:
jobs:
rhs-delta:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Use Node.js
uses: actions/setup-node@v2
with:
node-version: 18.x
- name: Install dependencies
run: yarn --frozen-lockfile
- name: Scrape delta data
env:
SB_PG_POOL_URL: ${{ secrets.SB_PG_POOL_URL }}
run: node scripts/update.js $(($(tail -n1 data/rhs/data.tsv | tr '\t' '\n' | head -n1) + 1)) data/rhs/delta.tsv true
- name: Concatenate whole and delta sets
run: (cat data.tsv && tail -n+2 delta.tsv) > new.tsv
working-directory: data/rhs
- name: Sort by date
run: (head -n1 new.tsv && tail -n+2 new.tsv | sort -t$'\t' -r -k9 ) > sorted.tsv
working-directory: data/rhs
- name: Clean up files
run: |
rm sorted.tsv delta.tsv
mv new.tsv data.tsv
working-directory: data/rhs
- name: Commit files
run: |
git config --local user.email "action@github.com"
git config --local user.name "GitHub Action"
git add data
git diff --quiet && git diff --staged --quiet || git commit -m "[action] RHS data daily delta"
- name: Push changes
uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
branch: ${{ github.ref }}