Skip to content

Commit 5baa663

Browse files
committed
Applying Claude's patch to make new modes on lsa for briefer product descriptions
1 parent 8c068c6 commit 5baa663

1 file changed

Lines changed: 42 additions & 11 deletions

File tree

scripts/articles/lsa.py

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,10 @@ def main():
163163
parser.add_argument('-g', '--gaps', action='store_true', help="Run and display the sort_order contiguity gap report")
164164
parser.add_argument('-r', '--reverse', action='store_true', help="Reverse the sorting order")
165165
parser.add_argument('-a', '--article', type=str, help="Generate a prompt_foo.py command for a slice of articles (e.g., '[-5:]')")
166+
parser.add_argument('--top', type=int, default=None, metavar='N', help="Limit output to the first N results (after sorting)")
167+
parser.add_argument('--match', type=str, default=None, metavar='TERMS', help="Filter articles whose filename contains all whitespace-separated terms (case-insensitive)")
168+
parser.add_argument('--tokens-under', type=int, default=None, metavar='N', dest='tokens_under', help="Exclude articles with token count >= N (requires reading each file)")
169+
parser.add_argument('--fmt', type=str, default='full', choices=['full', 'paths'], help="Output format: 'full' (default, with comments) or 'paths' (bare absolute paths only)")
166170
args = parser.parse_args()
167171

168172
targets = load_targets()
@@ -211,6 +215,29 @@ def main():
211215
# Sort first by date, then by the YAML sort_order
212216
metadata.sort(key=lambda p: (p['date'], p['sort_order']), reverse=args.reverse)
213217

218+
# --- PASS 1.5: FILTERING ---
219+
# --match: substring filter on filename (free, no I/O)
220+
if args.match:
221+
terms = args.match.lower().split()
222+
metadata = [item for item in metadata if all(t in item['path'].lower() for t in terms)]
223+
224+
# --top: limit after sort+filter
225+
if args.top is not None:
226+
metadata = metadata[:args.top]
227+
228+
# --tokens-under: expensive filter, read each file
229+
if args.tokens_under is not None:
230+
filtered = []
231+
for item in metadata:
232+
try:
233+
with open(item['path'], 'r', encoding='utf-8') as f:
234+
content = f.read()
235+
if count_tokens(content) < args.tokens_under:
236+
filtered.append(item)
237+
except Exception:
238+
filtered.append(item) # keep on error
239+
metadata = filtered
240+
214241
# --- PASS 2: OUTPUT GENERATION (REPORT OR COMMAND) ---
215242
if args.article:
216243
# Executable Telemetry Mode: Generate the prompt_foo.py command
@@ -235,17 +262,21 @@ def main():
235262
print(f"❌ Invalid slice format: {args.article}. Use format like '[-5:]'", file=sys.stderr)
236263
else:
237264
# Standard Mode: Heavy Lifting & Streaming Output
238-
for idx, item in enumerate(metadata, start=1):
239-
filepath = item['path']
240-
try:
241-
with open(filepath, 'r', encoding='utf-8') as f:
242-
content = f.read()
243-
tokens = count_tokens(content)
244-
bytes_count = len(content.encode('utf-8'))
245-
order = item['sort_order']
246-
print(f"{filepath} # [Idx: {idx} | Order: {order} | Tokens: {tokens:,} | Bytes: {bytes_count:,}]", flush=True)
247-
except Exception as e:
248-
print(f"# Error processing {filepath}: {e}", file=sys.stderr)
265+
if args.fmt == 'paths':
266+
for item in metadata:
267+
print(item['path'], flush=True)
268+
else:
269+
for idx, item in enumerate(metadata, start=1):
270+
filepath = item['path']
271+
try:
272+
with open(filepath, 'r', encoding='utf-8') as f:
273+
content = f.read()
274+
tokens = count_tokens(content)
275+
bytes_count = len(content.encode('utf-8'))
276+
order = item['sort_order']
277+
print(f"{filepath} # [Idx: {idx} | Order: {order} | Tokens: {tokens:,} | Bytes: {bytes_count:,}]", flush=True)
278+
except Exception as e:
279+
print(f"# Error processing {filepath}: {e}", file=sys.stderr)
249280

250281

251282
def get_holographic_article_data(target_dir: str) -> list[dict]:

0 commit comments

Comments
 (0)