Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --xml flag #16

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ venv
.pytest_cache
*.egg-info
.DS_Store
build/
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ For background on this project see [Building files-to-prompt entirely using Clau
## Installation

Install this tool using `pip`:

```bash
pip install files-to-prompt
```
Expand All @@ -29,11 +30,13 @@ This will output the contents of every file, with each file preceded by its rela
### Options

- `--include-hidden`: Include files and folders starting with `.` (hidden files and directories).

```bash
files-to-prompt path/to/directory --include-hidden
```

- `--ignore-gitignore`: Ignore `.gitignore` files and include all files.

```bash
files-to-prompt path/to/directory --ignore-gitignore
```
Expand Down Expand Up @@ -101,6 +104,27 @@ Contents of file3.txt
---
```

### XML Output

Anthropic has provided [specific guidelines](https://docs.anthropic.com/claude/docs/long-context-window-tips) for optimally structuring prompts to take advantage of Claude's extended context window.

To structure the output in this way, use the optional `--xml` flag, which will produce output like this:

```xml
Here are some documents for you to reference for your task:

<documents>
<document path="my_directory/file1.txt">
Contents of file1.txt
</document>

<document path="my_directory/file2.txt">
Contents of file2.txt
</document>
...
</documents>
```

## Development

To contribute to this tool, first checkout the code. Then create a new virtual environment:
Expand All @@ -118,6 +142,7 @@ pip install -e '.[test]'
```

To run the tests:

```bash
pytest
```
85 changes: 67 additions & 18 deletions files_to_prompt/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os
import click
from fnmatch import fnmatch

import click


def should_ignore(path, gitignore_rules):
for rule in gitignore_rules:
Expand All @@ -22,18 +23,39 @@ def read_gitignore(path):
return []


def print_path(path, content, xml):
if xml:
print_as_xml(path, content)
else:
print_default(path, content)


def print_default(path, content):
click.echo(path)
click.echo("---")
click.echo(content)
click.echo()
click.echo("---")


def print_as_xml(path, content):
click.echo(f'<document path="{path}">')
click.echo(content)
click.echo("</document>")


def process_path(
path, include_hidden, ignore_gitignore, gitignore_rules, ignore_patterns
path,
include_hidden,
ignore_gitignore,
gitignore_rules,
ignore_patterns,
xml,
):
if os.path.isfile(path):
try:
with open(path, "r") as f:
file_contents = f.read()
click.echo(path)
click.echo("---")
click.echo(file_contents)
click.echo()
click.echo("---")
print_path(path, f.read(), xml)
except UnicodeDecodeError:
warning_message = f"Warning: Skipping file {path} due to UnicodeDecodeError"
click.echo(click.style(warning_message, fg="red"), err=True)
Expand Down Expand Up @@ -63,17 +85,11 @@ def process_path(
if not any(fnmatch(f, pattern) for pattern in ignore_patterns)
]

for file in files:
for file in sorted(files):
file_path = os.path.join(root, file)
try:
with open(file_path, "r") as f:
file_contents = f.read()

click.echo(file_path)
click.echo("---")
click.echo(file_contents)
click.echo()
click.echo("---")
print_path(file_path, f.read(), xml)
except UnicodeDecodeError:
warning_message = (
f"Warning: Skipping file {file_path} due to UnicodeDecodeError"
Expand All @@ -100,8 +116,13 @@ def process_path(
default=[],
help="List of patterns to ignore",
)
@click.option(
"--xml",
is_flag=True,
help="Output in XML format suitable for Claude's long context window.",
)
@click.version_option()
def cli(paths, include_hidden, ignore_gitignore, ignore_patterns):
def cli(paths, include_hidden, ignore_gitignore, ignore_patterns, xml):
"""
Takes one or more paths to files or directories and outputs every file,
recursively, each one preceded with its filename like this:
Expand All @@ -114,13 +135,41 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns):
path/to/file2.py
---
...

If the `--xml` flag is provided, the output will be structured as follows:

Here are some documents for you to reference for your task:

<documents>
<document path="path/to/file1.txt">
Contents of file1.txt
</document>

<document path="path/to/file2.txt">
Contents of file2.txt
</document>
...
</documents>
"""
gitignore_rules = []
for path in paths:
if not os.path.exists(path):
raise click.BadArgumentUsage(f"Path does not exist: {path}")
if not ignore_gitignore:
gitignore_rules.extend(read_gitignore(os.path.dirname(path)))
if xml and path == paths[0]:
click.echo("Here are some documents for you to reference for your task:")
click.echo()
click.echo("<documents>")

process_path(
path, include_hidden, ignore_gitignore, gitignore_rules, ignore_patterns
path,
include_hidden,
ignore_gitignore,
gitignore_rules,
ignore_patterns,
xml,
)

if xml:
click.echo("</documents>")
57 changes: 57 additions & 0 deletions tests/test_files_to_prompt.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import os

from click.testing import CliRunner

from files_to_prompt.cli import cli


Expand Down Expand Up @@ -186,3 +188,58 @@ def test_binary_file_warning(tmpdir):
"Warning: Skipping file test_dir/binary_file.bin due to UnicodeDecodeError"
in stderr
)


def test_xml_format_dir(tmpdir):
runner = CliRunner()
with tmpdir.as_cwd():
os.makedirs("test_dir")
with open("test_dir/file1.txt", "w") as f:
f.write("Contents of file1")
with open("test_dir/file2.txt", "w") as f:
f.write("Contents of file2")

result = runner.invoke(cli, ["test_dir", "--xml"])
assert result.exit_code == 0
have = result.output
want = """Here are some documents for you to reference for your task:

<documents>
<document path="test_dir/file1.txt">
Contents of file1
</document>
<document path="test_dir/file2.txt">
Contents of file2
</document>
</documents>
"""
assert want == have


def test_xml_format_multiple_paths(tmpdir):
runner = CliRunner()
with tmpdir.as_cwd():
os.makedirs("test_dir")
with open("test_dir/file1.txt", "w") as f:
f.write("Contents of file1")
with open("test_dir/file2.txt", "w") as f:
f.write("Contents of file2")

result = runner.invoke(
cli, ["test_dir/file1.txt", "test_dir/file2.txt", "--xml"]
)

assert result.exit_code == 0
have = result.output
want = """Here are some documents for you to reference for your task:

<documents>
<document path="test_dir/file1.txt">
Contents of file1
</document>
<document path="test_dir/file2.txt">
Contents of file2
</document>
</documents>
"""
assert want == have