Skip to content

Commit

Permalink
Add --xml flag
Browse files Browse the repository at this point in the history
Closes #15
  • Loading branch information
lexh committed Apr 24, 2024
1 parent 4810ef7 commit ceb5da0
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 9 deletions.
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ For background on this project see [Building files-to-prompt entirely using Clau
## Installation

Install this tool using `pip`:

```bash
pip install files-to-prompt
```
Expand All @@ -29,11 +30,13 @@ This will output the contents of every file, with each file preceded by its rela
### Options

- `--include-hidden`: Include files and folders starting with `.` (hidden files and directories).

```bash
files-to-prompt path/to/directory --include-hidden
```

- `--ignore-gitignore`: Ignore `.gitignore` files and include all files.

```bash
files-to-prompt path/to/directory --ignore-gitignore
```
Expand Down Expand Up @@ -101,6 +104,26 @@ Contents of file3.txt
---
```

### XML Output

Anthropic has provided [specific guidelines](https://docs.anthropic.com/claude/docs/long-context-window-tips) for optimally structuring prompts to take advantage of Claude's extended context window.

To structure the output in this way, use the optional `--xml` flag, which will produce output like this:

```xml
Here are some documents for you to reference for your task:

<document path="my_directory/file1.txt">
Contents of file1.txt
</document>

<document path="my_directory/file2.txt">
Contents of file2.txt
</document>
...
</documents>
```

## Development

To contribute to this tool, first checkout the code. Then create a new virtual environment:
Expand All @@ -118,6 +141,7 @@ pip install -e '.[test]'
```

To run the tests:

```bash
pytest
```
64 changes: 55 additions & 9 deletions files_to_prompt/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os
import click
from fnmatch import fnmatch

import click


def should_ignore(path, gitignore_rules):
for rule in gitignore_rules:
Expand All @@ -23,7 +24,12 @@ def read_gitignore(path):


def process_path(
path, include_hidden, ignore_gitignore, gitignore_rules, ignore_patterns
path,
include_hidden,
ignore_gitignore,
gitignore_rules,
ignore_patterns,
xml,
):
if os.path.isfile(path):
try:
Expand Down Expand Up @@ -69,11 +75,16 @@ def process_path(
with open(file_path, "r") as f:
file_contents = f.read()

click.echo(file_path)
click.echo("---")
click.echo(file_contents)
click.echo()
click.echo("---")
if xml:
click.echo(f'<document path="{file_path}">')
click.echo(file_contents)
click.echo("</document>")
else:
click.echo(file_path)
click.echo("---")
click.echo(file_contents)
click.echo()
click.echo("---")
except UnicodeDecodeError:
warning_message = (
f"Warning: Skipping file {file_path} due to UnicodeDecodeError"
Expand All @@ -100,8 +111,13 @@ def process_path(
default=[],
help="List of patterns to ignore",
)
@click.option(
"--xml",
is_flag=True,
help="Output in XML format suitable for Claude's long context window.",
)
@click.version_option()
def cli(paths, include_hidden, ignore_gitignore, ignore_patterns):
def cli(paths, include_hidden, ignore_gitignore, ignore_patterns, xml):
"""
Takes one or more paths to files or directories and outputs every file,
recursively, each one preceded with its filename like this:
Expand All @@ -114,13 +130,43 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns):
path/to/file2.py
---
...
If the `--xml` flag is provided, the output will be structured as follows:
Here are some documents for you to reference for your task:
<documents>
<document path="path/to/file1.txt">
Contents of file1.txt
</document>
<document path="path/to/file2.txt">
Contents of file2.txt
</document>
...
</documents>
"""
gitignore_rules = []
for path in paths:
if not os.path.exists(path):
raise click.BadArgumentUsage(f"Path does not exist: {path}")
if not ignore_gitignore:
gitignore_rules.extend(read_gitignore(os.path.dirname(path)))
if xml and path == paths[0]:
click.echo("""
Here are some documents for you to reference for your task:
<documents>
""")

process_path(
path, include_hidden, ignore_gitignore, gitignore_rules, ignore_patterns
path,
include_hidden,
ignore_gitignore,
gitignore_rules,
ignore_patterns,
xml,
)

if xml:
click.echo("</documents>")
23 changes: 23 additions & 0 deletions tests/test_files_to_prompt.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import os

from click.testing import CliRunner

from files_to_prompt.cli import cli


Expand Down Expand Up @@ -186,3 +188,24 @@ def test_binary_file_warning(tmpdir):
"Warning: Skipping file test_dir/binary_file.bin due to UnicodeDecodeError"
in stderr
)


def test_xml_format(tmpdir):
runner = CliRunner()
with tmpdir.as_cwd():
os.makedirs("test_dir")
with open("test_dir/file1.txt", "w") as f:
f.write("Contents of file1")
with open("test_dir/file2.txt", "w") as f:
f.write("Contents of file2")

result = runner.invoke(cli, ["test_dir", "--xml"])
assert result.exit_code == 0
assert "<documents>" in result.output
assert '<document path="test_dir/file1.txt">' in result.output
assert "Contents of file1" in result.output
assert "</document>" in result.output
assert '<document path="test_dir/file2.txt">' in result.output
assert "Contents of file2" in result.output
assert "</document>" in result.output
assert "</documents>" in result.output

0 comments on commit ceb5da0

Please sign in to comment.