From 63e7e31916054baee65a2d3a28d96c9e2960c381 Mon Sep 17 00:00:00 2001
From: ChrisCarini <6374067+ChrisCarini@users.noreply.github.com>
Date: Sat, 17 May 2025 07:44:32 -0700
Subject: [PATCH] Add pagination to `api_process.py`

---
 api_process.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/api_process.py b/api_process.py
index 492a4c6..0330078 100644
--- a/api_process.py
+++ b/api_process.py
@@ -5,11 +5,11 @@
 # and produces a new file with only the keys (and associated values) that are
 # used for building the website.
 #
-# usage: python api_process.py
+# usage: python3 api_process.py
 
 import json
 import urllib.request
-from datetime import datetime
+from datetime import datetime, UTC
 
 REQUIRED_KEYS = {
     'description',
@@ -22,13 +22,22 @@
     'watchers_count',
 }
 
-GITHUB_LINKEDIN_REPO_URL = 'https://api.github.com/orgs/LinkedIn/repos?page=1&per_page=100'
-with urllib.request.urlopen(GITHUB_LINKEDIN_REPO_URL) as response:
-    gh_data = json.loads(response.read().decode('utf-8'))
+GITHUB_LINKEDIN_REPO_URL = 'https://api.github.com/orgs/LinkedIn/repos'
+GITHUB_LINKEDIN_REPO_URL_QUERY = f'{GITHUB_LINKEDIN_REPO_URL}?page={{page}}&per_page=100'
+all_repos = []
+page = 1
+while True:
+    with urllib.request.urlopen(GITHUB_LINKEDIN_REPO_URL_QUERY.format(page=page)) as response:
+        print(f'Fetching {GITHUB_LINKEDIN_REPO_URL} page {page}...')
+        gh_data = json.loads(response.read().decode('utf-8'))
+        if not gh_data:
+            break
+        all_repos.extend(gh_data)
+        page += 1
 
 filtered_repos = list()
 
-for repo in gh_data:
+for repo in all_repos:
     filtered_repo = dict()
     # Skip forked repos, as they are not directly owned by LinkedIn
     if repo.get('fork', False):
@@ -40,6 +49,6 @@
 
 # Write the data out in the desired format.
 with open('js/cached-github-api-response.js', 'w+') as f:
-    f.write(f'// Generated from {GITHUB_LINKEDIN_REPO_URL} on {datetime.utcnow().replace(microsecond=0).isoformat()}\n')
+    f.write(f'// Generated from {GITHUB_LINKEDIN_REPO_URL} on {datetime.now(UTC).date().isoformat()}\n')
     f.write('var cachedGithubApiResponse = ')
     json.dump(filtered_repos, f)