Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ __marimo__/
*/DS_Store
.DS_Store

/tests/test_code/*

*.vscode

*.sln
*.sln

tests/testcode/*
6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,12 @@ dependencies = [
"typer",
"watchfiles",
"loguru",
"fastmcp"
"fastmcp",
"pathspec"
]

[project.scripts]
tostr = "tostr.cli:app"

[tool.pytest.ini_options]
pythonpath = ["src"]
56 changes: 3 additions & 53 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,95 +1,45 @@
aiofile==3.9.0
altgraph==0.17.5
annotated-doc==0.0.4
annotated-types==0.7.0
anyio==4.12.1
attrs==25.4.0
Authlib==1.6.9
beartype==0.22.9
cachetools==7.0.5
caio==0.9.25
certifi==2026.2.25
cffi==2.0.0
charset-normalizer==3.4.5
click==8.3.1
cryptography==46.0.5
cyclopts==4.10.0
distro==1.9.0
dnspython==2.8.0
docstring_parser==0.17.0
docutils==0.22.4
email-validator==2.3.0
exceptiongroup==1.3.1
fastmcp==3.1.1
google-auth==2.49.0
google-genai==1.66.0
h11==0.16.0
httpcore==1.0.9
httpx==0.28.1
httpx-sse==0.4.3
idna==3.11
importlib_metadata==8.7.1
iniconfig==2.3.0
jaraco.classes==3.4.0
jaraco.context==6.1.1
jaraco.functools==4.4.0
jsonref==1.1.0
jsonschema==4.26.0
jsonschema-path==0.4.5
jsonschema-specifications==2025.9.1
keyring==25.7.0
loguru==0.7.3
macholib==1.16.4
markdown-it-py==4.0.0
mcp==1.26.0
mdurl==0.1.2
more-itertools==10.8.0
openapi-pydantic==0.5.1
opentelemetry-api==1.40.0
packaging==26.0
pathable==0.5.0
platformdirs==4.9.4
pluggy==1.6.0
py-key-value-aio==0.4.4
pyasn1==0.6.2
pyasn1_modules==0.4.2
pycparser==3.0
pydantic==2.12.5
pydantic-settings==2.13.1
pydantic_core==2.41.5
Pygments==2.19.2
pyinstaller==6.20.0
pyinstaller-hooks-contrib==2026.4
PyJWT==2.12.1
pyperclip==1.11.0
pytest==9.0.2
pytest-asyncio==1.3.0
python-dotenv==1.2.2
python-multipart==0.0.22
PyYAML==6.0.3
referencing==0.37.0
requests==2.32.5
rich==14.3.3
rich-rst==1.3.2
rpds-py==0.30.0
rsa==4.9.1
setuptools==82.0.1
shellingham==1.5.4
sniffio==1.3.1
sse-starlette==3.3.3
starlette==0.52.1
tenacity==9.1.4
# Editable install with no version control (toaster==0.1.0)
-e /Users/averybrown/Projects/python/Toaster
tree-sitter==0.25.2
tree-sitter-c-sharp==0.23.1
tree-sitter-java==0.23.5
tree-sitter-python==0.25.0
typer==0.24.1
typing-inspection==0.4.2
typing_extensions==4.15.0
uncalled-for==0.2.0
urllib3==2.6.3
uvicorn==0.42.0
watchdog==6.0.0
watchfiles==1.1.1
websockets==16.0
zipp==3.23.0
wheel==0.46.3
10 changes: 9 additions & 1 deletion src/tostr/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,14 @@ def init(
help="Load cache if it exists"
)
] = True,
ignore: Annotated[
str,
typer.Option(
"--ignore",
"-i",
help="Add a default ignore template to the project folder (e.g., 'java', 'default')"
)
] = None,
debug: Annotated[
bool,
typer.Option(
Expand All @@ -133,7 +141,7 @@ def init(
configure_cli_logging(debug)
start_time = time.perf_counter()
try:
asyncio.run(init_async(path, use_cache))
asyncio.run(init_async(path, use_cache, ignore))
except ToasterError as e:
typer.secho(f"❌ Error: {e}", fg="red", err=True)
raise typer.Exit(code=1)
Expand Down
31 changes: 30 additions & 1 deletion src/tostr/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ def clean_db(target_path: Path):
logger.info("Database cleaned.")
else:
logger.warning("No database found to clean.")

ignore_file = target_path / ".tostrignore"
if ignore_file.exists():
ignore_file.unlink()
logger.info(f"Deleted {ignore_file}")

async def _build_ast_async(target_path: Path, use_cache: bool = True) -> BaseParser:
llm = get_llm_client()
Expand All @@ -43,9 +48,33 @@ async def _build_ast_async(target_path: Path, use_cache: bool = True) -> BasePar
logger.success("✅ Parsed files")
return parser

async def init_async(target_path: Path, use_cache: bool = True):
def _write_default_ignore(target_path: Path, ignore_type: str):
base_path = Path(__file__).parent / "languages"
if ignore_type == "default":
template_path = base_path / "default.tostrignore"
else:
template_path = base_path / ignore_type / "default.tostrignore"

if template_path.exists():
ignore_file = target_path / ".tostrignore"
with open(template_path, 'r') as f:
content = f.read()

mode = 'a' if ignore_file.exists() else 'w'
with open(ignore_file, mode) as f:
if mode == 'a':
f.write("\n")
f.write(content)
logger.info(f"Written default ignore for {ignore_type} to {ignore_file}")
else:
logger.warning(f"No default ignore template found for {ignore_type} at {template_path}")

async def init_async(target_path: Path, use_cache: bool = True, ignore: str = None):
"""Core asynchronous logic for scraping and parsing."""

if ignore:
_write_default_ignore(target_path, ignore)

# Parse and resolve AST
parser = await _build_ast_async(target_path, use_cache=use_cache)

Expand Down
2 changes: 1 addition & 1 deletion src/tostr/core/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,6 @@ class DirectoryBuilder(BaseStructBuilder):
def from_dict(self, d: dict) -> Directory:
path = self.registry.relative_to_project(Path(d.get("path", ".")))
# logger.debug(f"Building Directory from dict with path: {path}")
return Directory(path=path, registry=self.registry)
return Directory(path=path, registry=self.registry, uid=d.get("uid"))


1 change: 1 addition & 0 deletions src/tostr/core/context/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .config import ProjectConfig
66 changes: 66 additions & 0 deletions src/tostr/core/context/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import tomllib
from pathlib import Path
from typing import Dict
import pathspec

from loguru import logger

class ProjectConfig:
HARDCODED_IGNORES = [
'.DS_Store',
'*.exe',
'*.bin',
'*.dll',
'*.so',
'*.dylib',
'*.pyc',
'*.pyo',
'*.pyd',
'__pycache__/',
'.git/',
'.svn/',
'.hg/',
'.tostr/',
'.tostrignore'
]

def __init__(self, project_path: Path):
self.project_path = project_path
self.toml_config = self._init_toml_config(project_path)
self.ignore_rules = self._init_path_ignore(project_path)
self.hardcoded_rules = pathspec.PathSpec.from_lines('gitignore', self.HARDCODED_IGNORES)

def _init_toml_config(self, project_path: Path) -> Dict:
toml_path = project_path / ".tostr" / "config.toml"
if toml_path.exists():
with open(toml_path, 'rb') as f:
config = tomllib.load(f)
logger.debug(f"Loaded configuration from {toml_path}")
return config
logger.debug("No config.toml found, using defaults.")
return {}

def _init_path_ignore(self, project_path: Path) -> pathspec.PathSpec:
ignore_path = project_path / ".tostrignore"
if ignore_path.exists():
with open(ignore_path, 'r') as f:
return pathspec.PathSpec.from_lines('gitignore', f)
return pathspec.PathSpec.from_lines('gitignore', [])

def is_ignored(self, file_path: Path) -> bool:
# 1. Convert to a POSIX string relative to the project root
try:
relative_path = file_path.resolve().relative_to(self.project_path.resolve()).as_posix()
except ValueError:
# If the file is outside the project root, we should probably ignore it
return True

# If it's a directory, append a slash so directory-only rules (like `dist/`) can match it
if file_path.is_dir() and not relative_path.endswith('/'):
relative_path += '/'

# Check hardcoded rules first
if self.hardcoded_rules.match_file(relative_path):
return True

return self.ignore_rules.match_file(relative_path)
22 changes: 17 additions & 5 deletions src/tostr/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,9 @@ def __str__(self):
class Directory(BaseStruct):
_IDPREFIX: ClassVar[str] = "D"

def __init__(self, path, registry=None, parent=None):
super().__init__(name=path.name, path=path, uid=str(path), registry=registry, parent=parent)
def __init__(self, path, registry=None, parent=None, uid=None):
uid = uid or str(path)
super().__init__(name=path.name, path=path, uid=uid, registry=registry, parent=parent)

async def resolve_description_async(self, llm: "LLMClient", visited: set[str] = None):
pass
Expand All @@ -204,9 +205,17 @@ def parse_children(self):
if self.path is None:
logger.error(f"{self} has no path")
return
for path in self.path.glob("*"):
if any(part in path.parts for part in ["venv", ".venv", "env", ".env", "build", "dist", "__pycache__", ".tostr", ".DS_Store", ".git"]):
continue

# Ensure we use an absolute path for globbing if it's relative
full_path = self.path
if not full_path.is_absolute() and self.registry:
full_path = self.registry.project_path / self.path

for path in full_path.glob("*"):
if self.registry.config.is_ignored(path):
logger.debug(f"Skipping '{path}' due to path ignore rules")
continue
else:
if path.is_dir():
logger.debug(f"🔍 Parsing directory '{path}'")
relative_path = self.registry.relative_to_project(path)
Expand All @@ -217,6 +226,9 @@ def parse_children(self):
else:
logger.debug(f"Attempting to resolve builder for suffix {path.parts[-1]}")
try:
if self.registry.config.is_ignored(path):
logger.debug(f"Skipping '{path}' due to path ignore rules")
continue
builder = StructBuilderProvider.get_builder(path.suffix, self.registry)
except LanguageNotSupportedError as e:
continue
Expand Down
21 changes: 17 additions & 4 deletions src/tostr/core/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@

class BaseParser(ABC):
def __init__(self, project_dir: str, llm=None, registry: Registry=None):
self.project_dir = project_dir
self.llm = llm
self.registry = registry
self.path_ignore = ["venv", ".venv", "env", ".env", "build", "dist", "__pycache__", ".tostr", ".git"]
# self.path_ignore = ["venv", ".venv", "env", ".env", "build", "dist", "__pycache__", ".tostr", ".git"]

@property
def files(self):
Expand All @@ -27,7 +28,7 @@ def files(self):

async def parse(self, subpath: Path = None):
if not subpath:
subpath = Path(".")
subpath = Path(self.project_dir)
if not isinstance(subpath, Path):
subpath = Path(subpath)

Expand All @@ -40,12 +41,19 @@ async def parse(self, subpath: Path = None):
def parse_path(self, subpath: Path = None):
if subpath.is_dir():
logger.debug(f"🔍 Parsing files in '{subpath}'")
root = Directory(path=subpath, registry=self.registry)

# Use relative path for root UID if possible
root_path = subpath
if self.registry:
root_path = self.registry.relative_to_project(subpath)

root = Directory(path=root_path, registry=self.registry)
self.registry.root = root
logger.debug(f"Created registry root: {root}")
self.registry.add_struct(root)
for path in subpath.glob("*"):
if any(part in path.parts for part in self.path_ignore):
if self.registry.config.is_ignored(path):
logger.debug(f"Skipping '{path}' due to path ignore rules")
continue
if path.is_dir():
logger.debug(f"🔍 Parsing directory '{path}'")
Expand All @@ -70,9 +78,14 @@ def parse_path(self, subpath: Path = None):
# @abstractmethod
def parse_file(self, subpath: Path, parent: BaseStruct=None) -> BaseFile:
logger.debug(f"Attempting to resolve builder for suffix {subpath.parts[-1]}")
if self.registry.config.is_ignored(subpath):
logger.debug(f"Skipping '{subpath}' due to path ignore rules")
return None

try:
builder = StructBuilderProvider.get_builder(subpath.suffix, self.registry)
except LanguageNotSupportedError as e:
logger.warning(str(e))
return None
file_obj = builder.build_file().from_path(subpath, parent=parent)
# logger.debug(json.dumps(file_obj.to_dict(), indent=2))
Expand Down
Loading