From 63e7e31916054baee65a2d3a28d96c9e2960c381 Mon Sep 17 00:00:00 2001 From: ChrisCarini <6374067+ChrisCarini@users.noreply.github.com> Date: Sat, 17 May 2025 07:44:32 -0700 Subject: [PATCH] Add pagination to `api_process.py` --- api_process.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/api_process.py b/api_process.py index 492a4c6..0330078 100644 --- a/api_process.py +++ b/api_process.py @@ -5,11 +5,11 @@ # and produces a new file with only the keys (and associated values) that are # used for building the website. # -# usage: python api_process.py +# usage: python3 api_process.py import json import urllib.request -from datetime import datetime +from datetime import datetime, UTC REQUIRED_KEYS = { 'description', @@ -22,13 +22,22 @@ 'watchers_count', } -GITHUB_LINKEDIN_REPO_URL = 'https://api.github.com/orgs/LinkedIn/repos?page=1&per_page=100' -with urllib.request.urlopen(GITHUB_LINKEDIN_REPO_URL) as response: - gh_data = json.loads(response.read().decode('utf-8')) +GITHUB_LINKEDIN_REPO_URL = 'https://api.github.com/orgs/LinkedIn/repos' +GITHUB_LINKEDIN_REPO_URL_QUERY = f'{GITHUB_LINKEDIN_REPO_URL}?page={{page}}&per_page=100' +all_repos = [] +page = 1 +while True: + with urllib.request.urlopen(GITHUB_LINKEDIN_REPO_URL_QUERY.format(page=page)) as response: + print(f'Fetching {GITHUB_LINKEDIN_REPO_URL} page {page}...') + gh_data = json.loads(response.read().decode('utf-8')) + if not gh_data: + break + all_repos.extend(gh_data) + page += 1 filtered_repos = list() -for repo in gh_data: +for repo in all_repos: filtered_repo = dict() # Skip forked repos, as they are not directly owned by LinkedIn if repo.get('fork', False): @@ -40,6 +49,6 @@ # Write the data out in the desired format. with open('js/cached-github-api-response.js', 'w+') as f: - f.write(f'// Generated from {GITHUB_LINKEDIN_REPO_URL} on {datetime.utcnow().replace(microsecond=0).isoformat()}\n') + f.write(f'// Generated from {GITHUB_LINKEDIN_REPO_URL} on {datetime.now(UTC).date().isoformat()}\n') f.write('var cachedGithubApiResponse = ') json.dump(filtered_repos, f)