Scrape Hacker News #21116
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Scrape Hacker News | |
on: | |
push: | |
workflow_dispatch: | |
schedule: | |
- cron: '17 * * * *' | |
jobs: | |
shot-scraper: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v2 | |
- name: Set up Python 3.10 | |
uses: actions/setup-python@v2 | |
with: | |
python-version: "3.10" | |
- uses: actions/cache@v2 | |
name: Configure pip caching | |
with: | |
path: ~/.cache/pip | |
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} | |
restore-keys: | | |
${{ runner.os }}-pip- | |
- name: Cache Playwright browsers | |
uses: actions/cache@v2 | |
with: | |
path: ~/.cache/ms-playwright/ | |
key: ${{ runner.os }}-browsers | |
- name: Install dependencies | |
run: | | |
pip install -r requirements.txt | |
- name: Install Playwright dependencies | |
run: | | |
shot-scraper install | |
- name: Scrape | |
run: | | |
shot-scraper javascript \ | |
"https://news.ycombinator.com/from?site=simonwillison.net" \ | |
-i scrape.js -o simonwillison-net.json | |
sleep 5 | |
shot-scraper javascript \ | |
"https://news.ycombinator.com/from?site=datasette.io" \ | |
-i scrape.js -o datasette-io.json | |
- name: Submit fresh data to my Datasette Cloud space | |
env: | |
DS_TOKEN: ${{ secrets.SIMON_DS_WRITE_TOKEN }} | |
run: |- | |
./submit-to-datasette-cloud.sh | |
- name: Commit and push | |
run: |- | |
git config user.name "Automated" | |
git config user.email "actions@users.noreply.github.com" | |
git add -A | |
timestamp=$(date -u) | |
git commit -m "${timestamp}" || exit 0 | |
git pull --rebase | |
git push |