Wrangle data from JSON to CSV #126
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
name: Wrangle data from JSON to CSV | |
on: | |
# Manual button in Github. | |
workflow_dispatch: | |
# Run on a schedule | |
schedule: | |
# daily at 1:33AM | |
- cron: '33 1 * * *' | |
jobs: | |
# could call the scrape workflow here but installing packages is the longest-running step - and would be repeated here. So, scrape as part of this workflow. | |
scheduled: | |
name: Wrangle data | |
runs-on: ubuntu-latest | |
steps: | |
- name: Check out this repo | |
# https://github.com/actions/checkout | |
uses: actions/checkout@v3 | |
- name: Install Python | |
# https://github.com/actions/setup-python | |
# Will install python version from .python-version | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10.6' | |
cache: pip | |
- name: Install Python packages | |
run: |- | |
make install | |
- name: Fetch latest data and save as JSON | |
run: |- | |
# Download data | |
# Regional forward 48h | |
python run.py download --output_directory data --now | |
# Regional past 24h | |
python run.py download --output_directory data --now --endpoint regional_pt24h | |
# National | |
python run.py download --output_directory data --now --endpoint national_fw48h | |
python run.py download --output_directory data --now --endpoint national_pt24h | |
python run.py download --output_directory data --now --endpoint national_generation_pt24h | |
- name: Wrangle all JSON files to CSV | |
run: |- | |
# Wrangle data to CSV | |
python run.py wrangle --input_directory data/regional_fw48h --delete_json | |
python run.py wrangle --input_directory data/regional_pt24h --delete_json | |
python3 run.py wrangle --input_directory "data/national_generation_pt24h" --endpoint "national_generation_pt24h" --delete_json | |
python3 run.py wrangle --input_directory "data/national_fw48h" --endpoint "national_fw48h" --delete_json | |
python3 run.py wrangle --input_directory "data/national_pt24h" --endpoint "national_pt24h" --delete_json | |
# Summarise | |
python3 run.py summary --input_directory "data/national_fw48h" --output_directory "data" --endpoint "national_fw48h" | |
python3 run.py summary --input_directory "data/national_pt24h" --output_directory "data" --endpoint "national_pt24h" | |
# Plot graphs | |
python3 run.py graph --input_directory "data" --output_directory "charts" | |
- name: Commit and push new data | |
run: |- | |
git config user.name "Automated" | |
git config user.email "actions@users.noreply.github.com" | |
git add -A | |
git commit -m "Summarise data and plot" || exit 0 | |
git pull --rebase | |
git push |