diff --git a/README.md b/README.md index 5d2359a..076cf63 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ For background on this project see [Building files-to-prompt entirely using Clau ## Installation Install this tool using `pip`: + ```bash pip install files-to-prompt ``` @@ -29,11 +30,13 @@ This will output the contents of every file, with each file preceded by its rela ### Options - `--include-hidden`: Include files and folders starting with `.` (hidden files and directories). + ```bash files-to-prompt path/to/directory --include-hidden ``` - `--ignore-gitignore`: Ignore `.gitignore` files and include all files. + ```bash files-to-prompt path/to/directory --ignore-gitignore ``` @@ -101,6 +104,26 @@ Contents of file3.txt --- ``` +### XML Output + +Anthropic has provided [specific guidelines](https://docs.anthropic.com/claude/docs/long-context-window-tips) for optimally structuring prompts to take advantage of Claude's extended context window. + +To structure the output in this way, use the optional `--xml` flag, which will produce output like this: + +```xml +Here are some documents for you to reference for your task: + + +Contents of file1.txt + + + +Contents of file2.txt + +... + +``` + ## Development To contribute to this tool, first checkout the code. Then create a new virtual environment: @@ -118,6 +141,7 @@ pip install -e '.[test]' ``` To run the tests: + ```bash pytest ``` diff --git a/files_to_prompt/cli.py b/files_to_prompt/cli.py index afba856..ce93f7b 100644 --- a/files_to_prompt/cli.py +++ b/files_to_prompt/cli.py @@ -1,7 +1,8 @@ import os -import click from fnmatch import fnmatch +import click + def should_ignore(path, gitignore_rules): for rule in gitignore_rules: @@ -23,7 +24,12 @@ def read_gitignore(path): def process_path( - path, include_hidden, ignore_gitignore, gitignore_rules, ignore_patterns + path, + include_hidden, + ignore_gitignore, + gitignore_rules, + ignore_patterns, + xml, ): if os.path.isfile(path): try: @@ -69,11 +75,16 @@ def process_path( with open(file_path, "r") as f: file_contents = f.read() - click.echo(file_path) - click.echo("---") - click.echo(file_contents) - click.echo() - click.echo("---") + if xml: + click.echo(f'') + click.echo(file_contents) + click.echo("") + else: + click.echo(file_path) + click.echo("---") + click.echo(file_contents) + click.echo() + click.echo("---") except UnicodeDecodeError: warning_message = ( f"Warning: Skipping file {file_path} due to UnicodeDecodeError" @@ -100,8 +111,13 @@ def process_path( default=[], help="List of patterns to ignore", ) +@click.option( + "--xml", + is_flag=True, + help="Output in XML format suitable for Claude's long context window.", +) @click.version_option() -def cli(paths, include_hidden, ignore_gitignore, ignore_patterns): +def cli(paths, include_hidden, ignore_gitignore, ignore_patterns, xml): """ Takes one or more paths to files or directories and outputs every file, recursively, each one preceded with its filename like this: @@ -114,6 +130,21 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns): path/to/file2.py --- ... + + If the `--xml` flag is provided, the output will be structured as follows: + + Here are some documents for you to reference for your task: + + + + Contents of file1.txt + + + + Contents of file2.txt + + ... + """ gitignore_rules = [] for path in paths: @@ -121,6 +152,21 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns): raise click.BadArgumentUsage(f"Path does not exist: {path}") if not ignore_gitignore: gitignore_rules.extend(read_gitignore(os.path.dirname(path))) + if xml and path == paths[0]: + click.echo(""" +Here are some documents for you to reference for your task: + + +""") + process_path( - path, include_hidden, ignore_gitignore, gitignore_rules, ignore_patterns + path, + include_hidden, + ignore_gitignore, + gitignore_rules, + ignore_patterns, + xml, ) + + if xml: + click.echo("") diff --git a/tests/test_files_to_prompt.py b/tests/test_files_to_prompt.py index 5e20af1..8f6410d 100644 --- a/tests/test_files_to_prompt.py +++ b/tests/test_files_to_prompt.py @@ -1,5 +1,7 @@ import os + from click.testing import CliRunner + from files_to_prompt.cli import cli @@ -186,3 +188,24 @@ def test_binary_file_warning(tmpdir): "Warning: Skipping file test_dir/binary_file.bin due to UnicodeDecodeError" in stderr ) + + +def test_xml_format(tmpdir): + runner = CliRunner() + with tmpdir.as_cwd(): + os.makedirs("test_dir") + with open("test_dir/file1.txt", "w") as f: + f.write("Contents of file1") + with open("test_dir/file2.txt", "w") as f: + f.write("Contents of file2") + + result = runner.invoke(cli, ["test_dir", "--xml"]) + assert result.exit_code == 0 + assert "" in result.output + assert '' in result.output + assert "Contents of file1" in result.output + assert "" in result.output + assert '' in result.output + assert "Contents of file2" in result.output + assert "" in result.output + assert "" in result.output