diff --git a/.gitignore b/.gitignore index 9215452..0024d3d 100644 --- a/.gitignore +++ b/.gitignore @@ -209,8 +209,8 @@ __marimo__/ */DS_Store .DS_Store -/tests/test_code/* - *.vscode -*.sln \ No newline at end of file +*.sln + +tests/testcode/* diff --git a/pyproject.toml b/pyproject.toml index 9f8278c..22cf13a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,8 +16,12 @@ dependencies = [ "typer", "watchfiles", "loguru", - "fastmcp" + "fastmcp", + "pathspec" ] [project.scripts] tostr = "tostr.cli:app" + +[tool.pytest.ini_options] +pythonpath = ["src"] diff --git a/requirements.txt b/requirements.txt index 4f8c5f5..195a076 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,95 +1,45 @@ -aiofile==3.9.0 -altgraph==0.17.5 annotated-doc==0.0.4 annotated-types==0.7.0 anyio==4.12.1 -attrs==25.4.0 -Authlib==1.6.9 -beartype==0.22.9 -cachetools==7.0.5 -caio==0.9.25 certifi==2026.2.25 cffi==2.0.0 charset-normalizer==3.4.5 click==8.3.1 cryptography==46.0.5 -cyclopts==4.10.0 distro==1.9.0 -dnspython==2.8.0 -docstring_parser==0.17.0 -docutils==0.22.4 -email-validator==2.3.0 -exceptiongroup==1.3.1 -fastmcp==3.1.1 google-auth==2.49.0 google-genai==1.66.0 h11==0.16.0 httpcore==1.0.9 httpx==0.28.1 -httpx-sse==0.4.3 idna==3.11 -importlib_metadata==8.7.1 iniconfig==2.3.0 -jaraco.classes==3.4.0 -jaraco.context==6.1.1 -jaraco.functools==4.4.0 -jsonref==1.1.0 -jsonschema==4.26.0 -jsonschema-path==0.4.5 -jsonschema-specifications==2025.9.1 -keyring==25.7.0 -loguru==0.7.3 -macholib==1.16.4 markdown-it-py==4.0.0 -mcp==1.26.0 mdurl==0.1.2 -more-itertools==10.8.0 -openapi-pydantic==0.5.1 -opentelemetry-api==1.40.0 packaging==26.0 -pathable==0.5.0 -platformdirs==4.9.4 pluggy==1.6.0 -py-key-value-aio==0.4.4 pyasn1==0.6.2 pyasn1_modules==0.4.2 pycparser==3.0 pydantic==2.12.5 -pydantic-settings==2.13.1 pydantic_core==2.41.5 Pygments==2.19.2 -pyinstaller==6.20.0 -pyinstaller-hooks-contrib==2026.4 -PyJWT==2.12.1 -pyperclip==1.11.0 pytest==9.0.2 pytest-asyncio==1.3.0 -python-dotenv==1.2.2 -python-multipart==0.0.22 -PyYAML==6.0.3 -referencing==0.37.0 requests==2.32.5 rich==14.3.3 -rich-rst==1.3.2 -rpds-py==0.30.0 rsa==4.9.1 -setuptools==82.0.1 shellingham==1.5.4 sniffio==1.3.1 -sse-starlette==3.3.3 -starlette==0.52.1 tenacity==9.1.4 +# Editable install with no version control (toaster==0.1.0) +-e /Users/averybrown/Projects/python/Toaster tree-sitter==0.25.2 tree-sitter-c-sharp==0.23.1 tree-sitter-java==0.23.5 -tree-sitter-python==0.25.0 typer==0.24.1 typing-inspection==0.4.2 typing_extensions==4.15.0 -uncalled-for==0.2.0 urllib3==2.6.3 -uvicorn==0.42.0 -watchdog==6.0.0 -watchfiles==1.1.1 websockets==16.0 -zipp==3.23.0 +wheel==0.46.3 diff --git a/src/tostr/cli.py b/src/tostr/cli.py index 1caa728..32abeaf 100644 --- a/src/tostr/cli.py +++ b/src/tostr/cli.py @@ -120,6 +120,14 @@ def init( help="Load cache if it exists" ) ] = True, + ignore: Annotated[ + str, + typer.Option( + "--ignore", + "-i", + help="Add a default ignore template to the project folder (e.g., 'java', 'default')" + ) + ] = None, debug: Annotated[ bool, typer.Option( @@ -133,7 +141,7 @@ def init( configure_cli_logging(debug) start_time = time.perf_counter() try: - asyncio.run(init_async(path, use_cache)) + asyncio.run(init_async(path, use_cache, ignore)) except ToasterError as e: typer.secho(f"❌ Error: {e}", fg="red", err=True) raise typer.Exit(code=1) diff --git a/src/tostr/commands.py b/src/tostr/commands.py index fe2dbac..e1efb07 100644 --- a/src/tostr/commands.py +++ b/src/tostr/commands.py @@ -30,6 +30,11 @@ def clean_db(target_path: Path): logger.info("Database cleaned.") else: logger.warning("No database found to clean.") + + ignore_file = target_path / ".tostrignore" + if ignore_file.exists(): + ignore_file.unlink() + logger.info(f"Deleted {ignore_file}") async def _build_ast_async(target_path: Path, use_cache: bool = True) -> BaseParser: llm = get_llm_client() @@ -43,9 +48,33 @@ async def _build_ast_async(target_path: Path, use_cache: bool = True) -> BasePar logger.success("✅ Parsed files") return parser -async def init_async(target_path: Path, use_cache: bool = True): +def _write_default_ignore(target_path: Path, ignore_type: str): + base_path = Path(__file__).parent / "languages" + if ignore_type == "default": + template_path = base_path / "default.tostrignore" + else: + template_path = base_path / ignore_type / "default.tostrignore" + + if template_path.exists(): + ignore_file = target_path / ".tostrignore" + with open(template_path, 'r') as f: + content = f.read() + + mode = 'a' if ignore_file.exists() else 'w' + with open(ignore_file, mode) as f: + if mode == 'a': + f.write("\n") + f.write(content) + logger.info(f"Written default ignore for {ignore_type} to {ignore_file}") + else: + logger.warning(f"No default ignore template found for {ignore_type} at {template_path}") + +async def init_async(target_path: Path, use_cache: bool = True, ignore: str = None): """Core asynchronous logic for scraping and parsing.""" + if ignore: + _write_default_ignore(target_path, ignore) + # Parse and resolve AST parser = await _build_ast_async(target_path, use_cache=use_cache) diff --git a/src/tostr/core/builders.py b/src/tostr/core/builders.py index 365c150..af149fc 100644 --- a/src/tostr/core/builders.py +++ b/src/tostr/core/builders.py @@ -134,6 +134,6 @@ class DirectoryBuilder(BaseStructBuilder): def from_dict(self, d: dict) -> Directory: path = self.registry.relative_to_project(Path(d.get("path", "."))) # logger.debug(f"Building Directory from dict with path: {path}") - return Directory(path=path, registry=self.registry) + return Directory(path=path, registry=self.registry, uid=d.get("uid")) \ No newline at end of file diff --git a/src/tostr/core/context/__init__.py b/src/tostr/core/context/__init__.py new file mode 100644 index 0000000..3b5b2a1 --- /dev/null +++ b/src/tostr/core/context/__init__.py @@ -0,0 +1 @@ +from .config import ProjectConfig \ No newline at end of file diff --git a/src/tostr/core/context/config.py b/src/tostr/core/context/config.py new file mode 100644 index 0000000..7f8635b --- /dev/null +++ b/src/tostr/core/context/config.py @@ -0,0 +1,66 @@ +import tomllib +from pathlib import Path +from typing import Dict +import pathspec + +from loguru import logger + +class ProjectConfig: + HARDCODED_IGNORES = [ + '.DS_Store', + '*.exe', + '*.bin', + '*.dll', + '*.so', + '*.dylib', + '*.pyc', + '*.pyo', + '*.pyd', + '__pycache__/', + '.git/', + '.svn/', + '.hg/', + '.tostr/', + '.tostrignore' + ] + + def __init__(self, project_path: Path): + self.project_path = project_path + self.toml_config = self._init_toml_config(project_path) + self.ignore_rules = self._init_path_ignore(project_path) + self.hardcoded_rules = pathspec.PathSpec.from_lines('gitignore', self.HARDCODED_IGNORES) + + def _init_toml_config(self, project_path: Path) -> Dict: + toml_path = project_path / ".tostr" / "config.toml" + if toml_path.exists(): + with open(toml_path, 'rb') as f: + config = tomllib.load(f) + logger.debug(f"Loaded configuration from {toml_path}") + return config + logger.debug("No config.toml found, using defaults.") + return {} + + def _init_path_ignore(self, project_path: Path) -> pathspec.PathSpec: + ignore_path = project_path / ".tostrignore" + if ignore_path.exists(): + with open(ignore_path, 'r') as f: + return pathspec.PathSpec.from_lines('gitignore', f) + return pathspec.PathSpec.from_lines('gitignore', []) + + def is_ignored(self, file_path: Path) -> bool: + # 1. Convert to a POSIX string relative to the project root + try: + relative_path = file_path.resolve().relative_to(self.project_path.resolve()).as_posix() + except ValueError: + # If the file is outside the project root, we should probably ignore it + return True + + # If it's a directory, append a slash so directory-only rules (like `dist/`) can match it + if file_path.is_dir() and not relative_path.endswith('/'): + relative_path += '/' + + # Check hardcoded rules first + if self.hardcoded_rules.match_file(relative_path): + return True + + return self.ignore_rules.match_file(relative_path) diff --git a/src/tostr/core/models.py b/src/tostr/core/models.py index 3d9a500..19c2bd4 100644 --- a/src/tostr/core/models.py +++ b/src/tostr/core/models.py @@ -194,8 +194,9 @@ def __str__(self): class Directory(BaseStruct): _IDPREFIX: ClassVar[str] = "D" - def __init__(self, path, registry=None, parent=None): - super().__init__(name=path.name, path=path, uid=str(path), registry=registry, parent=parent) + def __init__(self, path, registry=None, parent=None, uid=None): + uid = uid or str(path) + super().__init__(name=path.name, path=path, uid=uid, registry=registry, parent=parent) async def resolve_description_async(self, llm: "LLMClient", visited: set[str] = None): pass @@ -204,9 +205,17 @@ def parse_children(self): if self.path is None: logger.error(f"{self} has no path") return - for path in self.path.glob("*"): - if any(part in path.parts for part in ["venv", ".venv", "env", ".env", "build", "dist", "__pycache__", ".tostr", ".DS_Store", ".git"]): - continue + + # Ensure we use an absolute path for globbing if it's relative + full_path = self.path + if not full_path.is_absolute() and self.registry: + full_path = self.registry.project_path / self.path + + for path in full_path.glob("*"): + if self.registry.config.is_ignored(path): + logger.debug(f"Skipping '{path}' due to path ignore rules") + continue + else: if path.is_dir(): logger.debug(f"🔍 Parsing directory '{path}'") relative_path = self.registry.relative_to_project(path) @@ -217,6 +226,9 @@ def parse_children(self): else: logger.debug(f"Attempting to resolve builder for suffix {path.parts[-1]}") try: + if self.registry.config.is_ignored(path): + logger.debug(f"Skipping '{path}' due to path ignore rules") + continue builder = StructBuilderProvider.get_builder(path.suffix, self.registry) except LanguageNotSupportedError as e: continue diff --git a/src/tostr/core/parser.py b/src/tostr/core/parser.py index 54c24ea..46148b4 100644 --- a/src/tostr/core/parser.py +++ b/src/tostr/core/parser.py @@ -15,9 +15,10 @@ class BaseParser(ABC): def __init__(self, project_dir: str, llm=None, registry: Registry=None): + self.project_dir = project_dir self.llm = llm self.registry = registry - self.path_ignore = ["venv", ".venv", "env", ".env", "build", "dist", "__pycache__", ".tostr", ".git"] + # self.path_ignore = ["venv", ".venv", "env", ".env", "build", "dist", "__pycache__", ".tostr", ".git"] @property def files(self): @@ -27,7 +28,7 @@ def files(self): async def parse(self, subpath: Path = None): if not subpath: - subpath = Path(".") + subpath = Path(self.project_dir) if not isinstance(subpath, Path): subpath = Path(subpath) @@ -40,12 +41,19 @@ async def parse(self, subpath: Path = None): def parse_path(self, subpath: Path = None): if subpath.is_dir(): logger.debug(f"🔍 Parsing files in '{subpath}'") - root = Directory(path=subpath, registry=self.registry) + + # Use relative path for root UID if possible + root_path = subpath + if self.registry: + root_path = self.registry.relative_to_project(subpath) + + root = Directory(path=root_path, registry=self.registry) self.registry.root = root logger.debug(f"Created registry root: {root}") self.registry.add_struct(root) for path in subpath.glob("*"): - if any(part in path.parts for part in self.path_ignore): + if self.registry.config.is_ignored(path): + logger.debug(f"Skipping '{path}' due to path ignore rules") continue if path.is_dir(): logger.debug(f"🔍 Parsing directory '{path}'") @@ -70,9 +78,14 @@ def parse_path(self, subpath: Path = None): # @abstractmethod def parse_file(self, subpath: Path, parent: BaseStruct=None) -> BaseFile: logger.debug(f"Attempting to resolve builder for suffix {subpath.parts[-1]}") + if self.registry.config.is_ignored(subpath): + logger.debug(f"Skipping '{subpath}' due to path ignore rules") + return None + try: builder = StructBuilderProvider.get_builder(subpath.suffix, self.registry) except LanguageNotSupportedError as e: + logger.warning(str(e)) return None file_obj = builder.build_file().from_path(subpath, parent=parent) # logger.debug(json.dumps(file_obj.to_dict(), indent=2)) diff --git a/src/tostr/core/registry.py b/src/tostr/core/registry.py index 9f9ad1d..6745742 100644 --- a/src/tostr/core/registry.py +++ b/src/tostr/core/registry.py @@ -4,6 +4,7 @@ from tostr.core.models import BaseFile, BaseClass, BaseMethod, BaseField from tostr.core.db import SQLiteCache from tostr.core.builders import BaseBuilder +from tostr.core.context.config import ProjectConfig import json from loguru import logger @@ -19,6 +20,7 @@ def __init__(self, use_cache: bool = True, db: SQLiteCache = None, project_path: self.id_map: Dict[str, BaseStruct] = {} self.root: Optional[BaseStruct] = None self.db = db + self.config = ProjectConfig(project_path) if project_path else None @property def files(self) -> List[BaseFile]: @@ -87,6 +89,7 @@ def load_filepath(self, path: Path): instance = builder.with_type(struct_type=struct_type).from_dict(struct_data) if instance: + instance.id = str(struct_data['id']) self.add_struct(instance) logger.debug(f"Found {len(node_rows)} structs in subtree {path_str}") @@ -188,6 +191,7 @@ def get_struct_by_uid(self, uid: str) -> Optional["BaseStruct"]: instance = builder.with_type(struct_type=struct_type).from_dict(struct_data) if instance: + instance.id = current_id self.add_struct(instance) # logger.debug(f"Created instance for struct with DB UID {struct_data['uid']} and type {struct_type}") else: diff --git a/src/tostr/languages/default.tostrignore b/src/tostr/languages/default.tostrignore new file mode 100644 index 0000000..11f3fe0 --- /dev/null +++ b/src/tostr/languages/default.tostrignore @@ -0,0 +1,25 @@ +# Generic Tostr Ignores +/build/ +/dist/ +/out/ +/bin/ + +# Binary / System Ignores +.DS_Store +*.exe +*.bin +*.dll +*.so +*.dylib +*.pyc +*.pyo +*.pyd +__pycache__/ +.git/ +.svn/ +.hg/ +.tostr/ +.tostrignore + +*.log +*.tmp diff --git a/src/tostr/languages/java/default.tostrignore b/src/tostr/languages/java/default.tostrignore new file mode 100644 index 0000000..c126264 --- /dev/null +++ b/src/tostr/languages/java/default.tostrignore @@ -0,0 +1,74 @@ +# Binary / System Ignores +.DS_Store +*.exe +*.bin +*.dll +*.so +*.dylib +*.pyc +*.pyo +*.pyd +__pycache__/ +.git/ +.svn/ +.hg/ +.tostr/ +.tostrignore + +# Compiled class file +*.class + +# Log file +*.log + +# BlueJ files +*.ctxt + +# Mobile Tools for Java (J2ME) +.mtj.tmp/ + +# Package Files # +*.jar +*.war +*.nar +*.ear +*.zip +*.tar.gz +*.rar + +# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml +hs_err_pid* +replay_pid* + +# Maven +target/ +pom.xml.tag +pom.xml.releaseBackup +pom.xml.versionsBackup +pom.xml.next +release.properties +dependency-reduced-pom.xml +buildNumber.properties +.mvn/timing.properties +# https://github.com/takari/maven-wrapper +.mvn/wrapper/maven-wrapper.jar + +# Gradle +.gradle/ +build/ +!src/**/build/ + +# IntelliJ IDEA +.idea/ +*.iml +*.iws +out/ + +# Eclipse +.settings/ +.classpath +.project +.factorypath + +# VS Code +.vscode/ diff --git a/src/tostr/mcp.py b/src/tostr/mcp.py index 8501c5a..0823537 100644 --- a/src/tostr/mcp.py +++ b/src/tostr/mcp.py @@ -40,7 +40,7 @@ def _run_watcher_thread(target_path: Path): logger.info("Background watcher shut down cleanly.") @mcp.tool() -async def init(workspace_path: str, use_cache: bool = True) -> str: +async def init(workspace_path: str, use_cache: bool = True, ignore: str = None) -> str: """ -- MUST BE RUN BEFORE ANY OTHER TOOL -- Initializes the Toaster MCP server for a specific project workspace. @@ -48,6 +48,7 @@ async def init(workspace_path: str, use_cache: bool = True) -> str: Args: workspace_path: The ABSOLUTE path to the project workspace. DO NOT use '.' or relative paths. If you only have a relative path, you must determine the absolute path of the current workspace first. use_cache: Whether to use the existing AST cache. + ignore: Add a default ignore template to the project folder (e.g., 'java', 'default'). """ target_path = Path(workspace_path) @@ -72,7 +73,7 @@ async def init(workspace_path: str, use_cache: bool = True) -> str: try: configure_mcp_logging(project_dir) - await init_async(project_dir, use_cache) + await init_async(project_dir, use_cache, ignore) watcher_thread = threading.Thread( target=_run_watcher_thread, diff --git a/tests/core/context/test_config.py b/tests/core/context/test_config.py new file mode 100644 index 0000000..627e689 --- /dev/null +++ b/tests/core/context/test_config.py @@ -0,0 +1,108 @@ +import pytest +from pathlib import Path +from tostr.core.context.config import ProjectConfig # Change this to your actual filename + +@pytest.fixture +def project_root(tmp_path): + """ + Creates a temporary project structure with a .tostrignore file. + """ + # Define ignore rules + ignore_content = [ + "*.log", # Unanchored extension + "*.db", # Unanchored extension + "/top_level.txt", # Anchored to root + "dist/", # Directory only + "temp/*.tmp", # Anchored with wildcard + "!important.log", # Negation + ] + + ignore_file = tmp_path / ".tostrignore" + ignore_file.write_text("\n".join(ignore_content)) + + # Create the internal .tostr folder to ensure it exists + (tmp_path / ".tostr").mkdir() + + return tmp_path + +def test_basic_extension_ignore(project_root): + config = ProjectConfig(project_root) + + # Should ignore .log files anywhere + assert config.is_ignored(project_root / "debug.log") is True + assert config.is_ignored(project_root / "src" / "app.log") is True + + # Should ignore .db files + assert config.is_ignored(project_root / "data.db") is True + +def test_negation_logic(project_root): + config = ProjectConfig(project_root) + + # *.log is ignored, but !important.log should be kept + assert config.is_ignored(project_root / "normal.log") is True + assert config.is_ignored(project_root / "important.log") is False + +def test_anchored_vs_unanchored(project_root): + config = ProjectConfig(project_root) + + # /top_level.txt is anchored to root + assert config.is_ignored(project_root / "top_level.txt") is True + + # A file with the same name in a subdirectory should NOT be ignored + subdir_file = project_root / "src" / "top_level.txt" + assert config.is_ignored(subdir_file) is False + +def test_directory_only_ignore(project_root): + config = ProjectConfig(project_root) + + # Create a directory and a file with the same name + dist_dir = project_root / "dist" + dist_dir.mkdir() + dist_file = dist_dir / "bundle.js" + + # Should ignore the directory and its contents + assert config.is_ignored(dist_dir) is True + assert config.is_ignored(dist_file) is True + + # A file named 'dist' (not a directory) should technically not be ignored + # by the 'dist/' rule, but usually, tools treat these safely. + standalone_file = project_root / "not_a_dir_dist" + assert config.is_ignored(standalone_file) is False + +def test_internal_tostr_ignores(project_root): + config = ProjectConfig(project_root) + + # Hardcoded internal ignores + assert config.is_ignored(project_root / ".tostr" / "config.toml") is True + assert config.is_ignored(project_root / ".tostrignore") is True + +def test_out_of_bounds_path(project_root, tmp_path_factory): + config = ProjectConfig(project_root) + + # A path completely outside the project root + external_dir = tmp_path_factory.mktemp("external") + external_file = external_dir / "external.txt" + + # Our logic defaults to True (ignored/skipped) for files outside the root + assert config.is_ignored(external_file) is True + +def test_hardcoded_ignores(project_root): + config = ProjectConfig(project_root) + + # Create directory to test directory-only rules + pycache = project_root / "__pycache__" + pycache.mkdir() + + git_dir = project_root / ".git" + git_dir.mkdir() + + # Check some hardcoded binaries and system files + assert config.is_ignored(project_root / ".DS_Store") is True + assert config.is_ignored(project_root / "my_app.exe") is True + assert config.is_ignored(project_root / "lib.so") is True + assert config.is_ignored(pycache) is True + assert config.is_ignored(git_dir) is True + + # Check that it also works for subdirectories + assert config.is_ignored(project_root / "src" / ".DS_Store") is True + assert config.is_ignored(project_root / "bin" / "output.bin") is True diff --git a/tests/testcode/MRILib/.tostr/cache.db b/tests/testcode/MRILib/.tostr/cache.db index 247a547..0ff84f3 100644 Binary files a/tests/testcode/MRILib/.tostr/cache.db and b/tests/testcode/MRILib/.tostr/cache.db differ diff --git a/tests/testcode/MRILib/managers/.toaster/cache.db b/tests/testcode/MRILib/managers/.toaster/cache.db deleted file mode 100644 index 55c700a..0000000 Binary files a/tests/testcode/MRILib/managers/.toaster/cache.db and /dev/null differ