From b06cdf4fba2100901951e978b9bedb239dd0d4f5 Mon Sep 17 00:00:00 2001
From: Nathan Lambert <nathanl@allenai.org>
Date: Wed, 29 May 2024 10:00:34 -0700
Subject: [PATCH 1/3] nit

---
 scripts/create-config.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scripts/create-config.py b/scripts/create-config.py
index 66be0f1..a073609 100644
--- a/scripts/create-config.py
+++ b/scripts/create-config.py
@@ -253,6 +253,9 @@ def parse_markdown_to_dict(md_content, filename):
                 ):
                     text = prep_for_tts(text)
                     print(f"Rewrote index {total_index} with AI for TTS formatting.")
+                    # sometimes has bugs with commas,
+                    if " , " in text:
+                        text = text.replace(" , ", ", ")
 
                 # remove :, -, and leading space from text
                 text = text.replace(":", ",")

From 354d5d74b66557ca4898a55212fd52e892d15ec5 Mon Sep 17 00:00:00 2001
From: Nathan Lambert <nathanl@allenai.org>
Date: Wed, 29 May 2024 11:20:59 -0700
Subject: [PATCH 2/3] add start idx to image generate

---
 scripts/ttv-generate.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/ttv-generate.py b/scripts/ttv-generate.py
index 53b1bd6..84f4558 100644
--- a/scripts/ttv-generate.py
+++ b/scripts/ttv-generate.py
@@ -115,6 +115,7 @@ def get_image(idx, inputs, vivid=True, hd=True, rewrite=True, no_sleep=False):
     parser = argparse.ArgumentParser()
     parser.add_argument("--input", type=str, required=True, help="input text file dir")
     parser.add_argument("--do_not_gen", action="store_true", default=False, help="only download images")
+    parser.add_argument("--start_idx", type=int, default=0, help="start index for generation")
     args = parser.parse_args()
 
     # load yml file at args.input + config.yml
@@ -167,7 +168,8 @@ def get_image(idx, inputs, vivid=True, hd=True, rewrite=True, no_sleep=False):
     # if --do_not_gen, do not do this
     if not args.do_not_gen:
         with Pool(processes=3) as pool:
-            pool.starmap(get_image, enumerate(zip(prompts, title)))
+            # enumerate based on start index
+            pool.starmap(get_image, enumerate(zip(prompts, title), start=args.start_idx))
 
         # move all images from temp-images to args.input/images
         os.system(f"mv temp-images/* {args.input}images")

From 78b9c4bb64fee7c5a974000c2df7fca1df66eb89 Mon Sep 17 00:00:00 2001
From: Nathan Lambert <nathanl@allenai.org>
Date: Wed, 29 May 2024 11:47:48 -0700
Subject: [PATCH 3/3] up

---
 scripts/create-config.py  |  4 ++--
 scripts/list-artifacts.py | 37 +++++++++++++++++++++++++------------
 scripts/ttv-generate.py   |  4 +++-
 3 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/scripts/create-config.py b/scripts/create-config.py
index a073609..d5600a3 100644
--- a/scripts/create-config.py
+++ b/scripts/create-config.py
@@ -23,7 +23,7 @@
     ">": "",
     "**": "",
     "*": "",
-    "~": "approximately ", # for numbers
+    "~": "approximately ",  # for numbers
     " | ": " ",
     "\\ ": " ",
     "e.g.": "e g",
@@ -259,7 +259,7 @@ def parse_markdown_to_dict(md_content, filename):
 
                 # remove :, -, and leading space from text
                 text = text.replace(":", ",")
-                text = text.replace("--", ",") # simpler pause
+                text = text.replace("--", ",")  # simpler pause
                 if text.startswith(" "):
                     text = text[1:]
 
diff --git a/scripts/list-artifacts.py b/scripts/list-artifacts.py
index c3f81a6..5217132 100644
--- a/scripts/list-artifacts.py
+++ b/scripts/list-artifacts.py
@@ -1,26 +1,28 @@
 import argparse
+
 from huggingface_hub import get_collection
 
+
 def process_collection(collection_name, index, print_idx=False):
     collection = get_collection(collection_name)
 
     if index < 0 or index >= len(collection.items):
         return f"Error: invalid index: {index} for length of collection: {len(collection.items)}"
-    
+
     markdown_list = f"\n# Artifacts Log N\n\n"
-    categories = {'model': [], 'dataset': [], 'Space': []}
+    categories = {"model": [], "dataset": [], "Space": []}
 
     for idx, item in enumerate(collection.items[index:]):
-        author, model_name = item.item_id.split('/')
-        if item.item_type == 'model':
+        author, model_name = item.item_id.split("/")
+        if item.item_type == "model":
             model_link = f"https://huggingface.co/{item.item_id}"
         else:
             model_link = f"https://huggingface.co/{item.item_type}s/{item.item_id}"
         entry = f"- **[{model_name}]({model_link})** by [{author}](https://huggingface.co/{author}): TODO\n"
-        
+
         if print_idx:
             entry = f"- {idx + index} [{model_name}]({model_link}) by {author}\n"
-        
+
         if item.item_type in categories:
             categories[item.item_type].append(entry)
 
@@ -33,16 +35,27 @@ def process_collection(collection_name, index, print_idx=False):
     markdown_list += "\n References: ([2024 artifacts](https://huggingface.co/collections/natolambert/2024-interconnects-artifacts-6619a19e944c1e47024e9988), [2023 artifacts](https://huggingface.co/collections/natolambert/2023-interconnects-artifacts-661b19d27082ad0b43d67b17), [MMLU vs training compute model](https://docs.google.com/spreadsheets/d/13LMlSGQQ3_qxbjIcEkgqofr2Ay1JT0XEH4S-AWQi8so/edit?usp=sharing)) \n"
     return markdown_list
 
+
 def main():
-    parser = argparse.ArgumentParser(description='Process a Hugging Face collection into a Markdown list.')
-    parser.add_argument('collection_name', nargs='?', default='natolambert/2024-interconnects-artifacts-6619a19e944c1e47024e9988',
-                        help='The name of the Hugging Face collection (default: natolambert/2024-interconnects-artifacts-6619a19e944c1e47024e9988)')
-    parser.add_argument('--index', type=int, default=0, help='The start index of the collection list (to take the most recent elements)')
-    parser.add_argument('--print_idx', action='store_true', help='Print the index of the collection list')
+    parser = argparse.ArgumentParser(description="Process a Hugging Face collection into a Markdown list.")
+    parser.add_argument(
+        "collection_name",
+        nargs="?",
+        default="natolambert/2024-interconnects-artifacts-6619a19e944c1e47024e9988",
+        help="The name of the Hugging Face collection (default: natolambert/2024-interconnects-artifacts-6619a19e944c1e47024e9988)",
+    )
+    parser.add_argument(
+        "--index",
+        type=int,
+        default=0,
+        help="The start index of the collection list (to take the most recent elements)",
+    )
+    parser.add_argument("--print_idx", action="store_true", help="Print the index of the collection list")
     args = parser.parse_args()
 
     markdown_list = process_collection(args.collection_name, args.index, args.print_idx)
     print(markdown_list)
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     main()
diff --git a/scripts/ttv-generate.py b/scripts/ttv-generate.py
index 84f4558..0504dd9 100644
--- a/scripts/ttv-generate.py
+++ b/scripts/ttv-generate.py
@@ -169,7 +169,9 @@ def get_image(idx, inputs, vivid=True, hd=True, rewrite=True, no_sleep=False):
     if not args.do_not_gen:
         with Pool(processes=3) as pool:
             # enumerate based on start index
-            pool.starmap(get_image, enumerate(zip(prompts, title), start=args.start_idx))
+            pool.starmap(
+                get_image, enumerate(zip(prompts[args.start_idx :], title[args.start_idx :]), start=args.start_idx)
+            )
 
         # move all images from temp-images to args.input/images
         os.system(f"mv temp-images/* {args.input}images")