Skip to content

Wrangle data from JSON to CSV #367

Wrangle data from JSON to CSV

Wrangle data from JSON to CSV #367

Workflow file for this run

---
name: Wrangle data from JSON to CSV
on:
# Manual button in Github.
workflow_dispatch:
# Run on a schedule
schedule:
# daily at 1:33AM
- cron: '33 1 * * *'
jobs:
# could call the scrape workflow here but installing packages is the longest-running step - and would be repeated here. So, scrape as part of this workflow.
scheduled:
name: Wrangle data
runs-on: ubuntu-latest
steps:
- name: Check out this repo
# https://github.com/actions/checkout
uses: actions/checkout@v3
- name: Install Python
# https://github.com/actions/setup-python
# Will install python version from .python-version
uses: actions/setup-python@v4
with:
python-version: '3.10.6'
cache: pip
- name: Install Python packages
run: |-
make install
- name: Fetch latest data and save as JSON
run: |-
# Download data
# Regional forward 48h
python run.py download --output_directory data --now
# Regional past 24h
python run.py download --output_directory data --now --endpoint regional_pt24h
# National
python run.py download --output_directory data --now --endpoint national_fw48h
python run.py download --output_directory data --now --endpoint national_pt24h
python run.py download --output_directory data --now --endpoint national_generation_pt24h
- name: Wrangle all JSON files to CSV
run: |-
# Wrangle data to CSV
python run.py wrangle --input_directory data/regional_fw48h --delete_json
python run.py wrangle --input_directory data/regional_pt24h --delete_json
python3 run.py wrangle --input_directory "data/national_generation_pt24h" --endpoint "national_generation_pt24h" --delete_json
python3 run.py wrangle --input_directory "data/national_fw48h" --endpoint "national_fw48h" --delete_json
python3 run.py wrangle --input_directory "data/national_pt24h" --endpoint "national_pt24h" --delete_json
# Summarise
python3 run.py summary --input_directory "data/national_fw48h" --output_directory "data" --endpoint "national_fw48h" --delete_old_files
python3 run.py summary --input_directory "data/national_pt24h" --output_directory "data" --endpoint "national_pt24h" --delete_old_files
# Plot graphs
python3 run.py graph --input_directory "data" --output_directory "charts"
- name: Commit and push new data
run: |-
git config user.name "Automated"
git config user.email "actions@users.noreply.github.com"
git add -A
git commit -m "chore: Summarise data and plot" || exit 0
git pull --rebase
git push