Run over multiple threads

Most of the wait is from network resources, so might as well run lots of them at once. Before: 43.59s After: 4.65s
selfhostedshow · Jul 18, 2021 · 2b5196e · 2b5196e
1 parent b06e82c
commit 2b5196e
Showing 1 changed file with 25 additions and 16 deletions.
diff --git a/scrape.py b/scrape.py
@@ -1,3 +1,4 @@
+import concurrent.futures
 import os
 import shutil
 
@@ -27,25 +28,10 @@ def get_duration(seconds):
     return f"{minutes} mins {seconds} secs"
 
 
-api_data = requests.get(BASE_URL + "/json").json()
-
-episodes = []
-
-# Remove any outputs from a previous run
-try:
-    shutil.rmtree(OUTPUT_DIR)
-except:
-    pass
-
-os.mkdir(OUTPUT_DIR)
-
-# Do the stuff
-for api_episode in api_data["items"]:
+def create_episode(api_episode):
     # RANT: What kind of API doesn't give the episode number?!
     episode_number = int(api_episode["url"].split("/")[-1])
 
-    print(episode_number, end="\r")
-
     api_soup = BeautifulSoup(api_episode["content_html"], "html.parser")
 
     blurb = api_episode["summary"]
@@ -96,3 +82,26 @@ def get_duration(seconds):
 
     with open(f"{OUTPUT_DIR}/episode-{episode_number}.md", "w") as f:
         f.write(output)
+
+
+def main():
+    api_data = requests.get(BASE_URL + "/json").json()
+
+    episodes = []
+
+    # Remove any outputs from a previous run
+    try:
+        shutil.rmtree(OUTPUT_DIR)
+    except:
+        pass
+
+    os.mkdir(OUTPUT_DIR)
+
+    # Run over multiple threads
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        for api_episode in api_data["items"]:
+            executor.submit(create_episode, api_episode)
+
+
+if __name__ == "__main__":
+    main()