@@ -163,6 +163,10 @@ def main():
163163 parser .add_argument ('-g' , '--gaps' , action = 'store_true' , help = "Run and display the sort_order contiguity gap report" )
164164 parser .add_argument ('-r' , '--reverse' , action = 'store_true' , help = "Reverse the sorting order" )
165165 parser .add_argument ('-a' , '--article' , type = str , help = "Generate a prompt_foo.py command for a slice of articles (e.g., '[-5:]')" )
166+ parser .add_argument ('--top' , type = int , default = None , metavar = 'N' , help = "Limit output to the first N results (after sorting)" )
167+ parser .add_argument ('--match' , type = str , default = None , metavar = 'TERMS' , help = "Filter articles whose filename contains all whitespace-separated terms (case-insensitive)" )
168+ parser .add_argument ('--tokens-under' , type = int , default = None , metavar = 'N' , dest = 'tokens_under' , help = "Exclude articles with token count >= N (requires reading each file)" )
169+ parser .add_argument ('--fmt' , type = str , default = 'full' , choices = ['full' , 'paths' ], help = "Output format: 'full' (default, with comments) or 'paths' (bare absolute paths only)" )
166170 args = parser .parse_args ()
167171
168172 targets = load_targets ()
@@ -211,6 +215,29 @@ def main():
211215 # Sort first by date, then by the YAML sort_order
212216 metadata .sort (key = lambda p : (p ['date' ], p ['sort_order' ]), reverse = args .reverse )
213217
218+ # --- PASS 1.5: FILTERING ---
219+ # --match: substring filter on filename (free, no I/O)
220+ if args .match :
221+ terms = args .match .lower ().split ()
222+ metadata = [item for item in metadata if all (t in item ['path' ].lower () for t in terms )]
223+
224+ # --top: limit after sort+filter
225+ if args .top is not None :
226+ metadata = metadata [:args .top ]
227+
228+ # --tokens-under: expensive filter, read each file
229+ if args .tokens_under is not None :
230+ filtered = []
231+ for item in metadata :
232+ try :
233+ with open (item ['path' ], 'r' , encoding = 'utf-8' ) as f :
234+ content = f .read ()
235+ if count_tokens (content ) < args .tokens_under :
236+ filtered .append (item )
237+ except Exception :
238+ filtered .append (item ) # keep on error
239+ metadata = filtered
240+
214241 # --- PASS 2: OUTPUT GENERATION (REPORT OR COMMAND) ---
215242 if args .article :
216243 # Executable Telemetry Mode: Generate the prompt_foo.py command
@@ -235,17 +262,21 @@ def main():
235262 print (f"❌ Invalid slice format: { args .article } . Use format like '[-5:]'" , file = sys .stderr )
236263 else :
237264 # Standard Mode: Heavy Lifting & Streaming Output
238- for idx , item in enumerate (metadata , start = 1 ):
239- filepath = item ['path' ]
240- try :
241- with open (filepath , 'r' , encoding = 'utf-8' ) as f :
242- content = f .read ()
243- tokens = count_tokens (content )
244- bytes_count = len (content .encode ('utf-8' ))
245- order = item ['sort_order' ]
246- print (f"{ filepath } # [Idx: { idx } | Order: { order } | Tokens: { tokens :,} | Bytes: { bytes_count :,} ]" , flush = True )
247- except Exception as e :
248- print (f"# Error processing { filepath } : { e } " , file = sys .stderr )
265+ if args .fmt == 'paths' :
266+ for item in metadata :
267+ print (item ['path' ], flush = True )
268+ else :
269+ for idx , item in enumerate (metadata , start = 1 ):
270+ filepath = item ['path' ]
271+ try :
272+ with open (filepath , 'r' , encoding = 'utf-8' ) as f :
273+ content = f .read ()
274+ tokens = count_tokens (content )
275+ bytes_count = len (content .encode ('utf-8' ))
276+ order = item ['sort_order' ]
277+ print (f"{ filepath } # [Idx: { idx } | Order: { order } | Tokens: { tokens :,} | Bytes: { bytes_count :,} ]" , flush = True )
278+ except Exception as e :
279+ print (f"# Error processing { filepath } : { e } " , file = sys .stderr )
249280
250281
251282def get_holographic_article_data (target_dir : str ) -> list [dict ]:
0 commit comments