In [None]:
import os
from pathlib import Path

# Root directory
root = Path("/app/data/dev/hyfs/test/fs")

# Create the root directory
root.mkdir(parents=True, exist_ok=True)

# Define the filesystem structure with lots of variety and edge cases
filesystem = {
    # Regular files at root
    "README.md": "# Test Project\n",
    "config.json": '{"version": "1.0"}\n',
    ".gitignore": "*.pyc\n__pycache__/\n",
    ".env": "SECRET_KEY=test123\n",
    "requirements.txt": "numpy==1.24.0\npandas>=2.0.0\n",
    
    # Source code directory
    "src/main.py": "def main():\n    pass\n",
    "src/utils.py": "# Utilities\n",
    "src/__init__.py": "",
    "src/models/user.py": "class User:\n    pass\n",
    "src/models/__init__.py": "",
    "src/models/product.py": "class Product:\n    pass\n",
    
    # Tests directory
    "tests/test_main.py": "def test_main():\n    assert True\n",
    "tests/__init__.py": "",
    "tests/fixtures/data.json": '{"test": "data"}\n',
    "tests/fixtures/sample.csv": "id,name,value\n1,test,100\n",
    
    # Data directory with various file types
    "data/raw/dataset_2023.csv": "col1,col2,col3\n1,2,3\n",
    "data/raw/dataset_2024.csv": "col1,col2,col3\n4,5,6\n",
    "data/processed/cleaned_data.parquet": b"fake parquet data",
    "data/processed/features.pkl": b"fake pickle data",
    "data/images/photo1.jpg": b"fake jpg data",
    "data/images/photo2.png": b"fake png data",
    "data/images/thumbnails/thumb1.jpg": b"fake thumbnail",
    
    # Documentation
    "docs/index.html": "<html><body>Docs</body></html>\n",
    "docs/api/endpoints.md": "# API Endpoints\n",
    "docs/api/authentication.md": "# Auth\n",
    "docs/guides/getting-started.pdf": b"fake pdf data",
    
    # Configuration files
    "config/development.yaml": "debug: true\n",
    "config/production.yaml": "debug: false\n",
    "config/database.ini": "[database]\nhost=localhost\n",
    
    # Build artifacts
    "build/output.js": "console.log('built');\n",
    "build/styles.css": "body { margin: 0; }\n",
    "dist/bundle.min.js": "!function(){console.log('minified')}();\n",
    
    # Edge cases
    "files with spaces/document 1.txt": "Content with spaces\n",
    "files with spaces/my file (copy).docx": b"fake docx",
    "special-chars/file@2024.txt": "File with @ symbol\n",
    "special-chars/data#1.csv": "test,data\n",
    "special-chars/report_v2.1.pdf": b"fake pdf",
    "multiple.dots.in.name.txt": "Multiple dots\n",
    "UPPERCASE.TXT": "UPPERCASE FILE\n",
    "MixedCase.TxT": "Mixed case extension\n",
    
    # Hidden files and directories
    ".hidden/secret.txt": "Hidden content\n",
    ".hidden/.config": "hidden config\n",
    ".cache/temp1.tmp": "cache data\n",
    
    # Empty directory (will create separately)
    "empty_dir/.keep": "",
    
    # Deep nesting
    "a/b/c/d/e/deep_file.txt": "Very nested\n",
    
    # Various extensions
    "scripts/deploy.sh": "#!/bin/bash\necho 'deploying'\n",
    "scripts/backup.bat": "@echo off\necho backing up\n",
    "notebooks/analysis.ipynb": '{"cells": []}\n',
    "media/video.mp4": b"fake video data",
    "media/audio.mp3": b"fake audio data",
    "archives/backup.zip": b"fake zip data",
    "archives/old_data.tar.gz": b"fake tar.gz data",
    
    # Files with no extension
    "LICENSE": "MIT License\n",
    "Makefile": "all:\n\techo 'building'\n",
    "Dockerfile": "FROM python:3.11\n",
    
    # Very long filename
    "long_filename_that_goes_on_and_on_and_on_to_test_length_limits.txt": "Long name\n",
    
    # Numeric filenames
    "logs/2024-01-01.log": "[INFO] Log entry\n",
    "logs/2024-01-02.log": "[ERROR] Error entry\n",
    "reports/001_report.txt": "Report 1\n",
    "reports/002_report.txt": "Report 2\n",
}

# Create all files and directories
for filepath, content in filesystem.items():
    full_path = root / filepath
    full_path.parent.mkdir(parents=True, exist_ok=True)
    
    if isinstance(content, bytes):
        full_path.write_bytes(content)
    else:
        full_path.write_text(content)

# Create a truly empty directory
(root / "truly_empty").mkdir(exist_ok=True)

# Create another empty nested directory
(root / "temp/cache/empty").mkdir(parents=True, exist_ok=True)

print(f"✓ Created dummy filesystem at {root}")
print(f"✓ Total files created: {len(filesystem)}")
print(f"✓ Includes edge cases: spaces, special chars, hidden files, deep nesting, various extensions")

✓ Created dummy filesystem at /app/data/dev/hyfs/test/fs
✓ Total files created: 60
✓ Includes edge cases: spaces, special chars, hidden files, deep nesting, various extensions


In [None]:
!find /app/data/dev/hyfs/test -print | sed -e "s;/app/data/dev/hyfs/test;;" -e "s;[^/]*/;|  ;g" -e "s;|  \([^|]\);├─ \1;"


├─ fs
|  ├─ config.json
|  ├─ .hidden
|  |  ├─ secret.txt
|  |  ├─ .config
|  ├─ src
|  |  ├─ models
|  |  |  ├─ product.py
|  |  |  ├─ __init__.py
|  |  |  ├─ user.py
|  |  ├─ utils.py
|  |  ├─ __init__.py
|  |  ├─ main.py
|  ├─ dist
|  |  ├─ bundle.min.js
|  ├─ tests
|  |  ├─ test_main.py
|  |  ├─ __init__.py
|  |  ├─ fixtures
|  |  |  ├─ data.json
|  |  |  ├─ sample.csv
|  ├─ build
|  |  ├─ styles.css
|  |  ├─ output.js
|  ├─ reports
|  |  ├─ 002_report.txt
|  |  ├─ 001_report.txt
|  ├─ config
|  |  ├─ development.yaml
|  |  ├─ production.yaml
|  |  ├─ database.ini
|  ├─ .gitignore
|  ├─ requirements.txt
|  ├─ media
|  |  ├─ video.mp4
|  |  ├─ audio.mp3
|  ├─ docs
|  |  ├─ api
|  |  |  ├─ endpoints.md
|  |  |  ├─ authentication.md
|  |  ├─ index.html
|  |  ├─ guides
|  |  |  ├─ getting-started.pdf
|  ├─ files with spaces
|  |  ├─ document 1.txt
|  |  ├─ my file (copy).docx
|  ├─ UPPERCASE.TXT
|  ├─ MixedCase.TxT
|  ├─ README.md
|  ├

# HyFS
> A Hyper FileSystem

Architecture phase: exploration (3)

In [None]:
%cat notes/design.md

This updated design doc now:
- Reflects the flat storage architecture
- Explains the shift from hierarchical to flat + derived views
- Documents all current working code
- Provides complete code reference for new session
- Maintains the philosophical foundations
- Captures lessons learned from the refactor
- Maps out clear future directions

-----

# HyFS Design Document
> Hyper FileSystem - A filesystem abstraction with stable identity and semantic relationships

## Overview
HyFS is a Python-based filesystem management tool built on fastcore principles. It provides flat storage with stable entity identification (eids), enabling multiple derived views (tree, tags, relationships) from a single canonical representation. Designed for interactive exploration, filtering, semantic organization, and eventual manipulation.

## Core Philosophy

### Principle of Lean Information Form (LIF)
Information must be expressed in its meaningful form, preserving integrity without requ

In [None]:
import uuid
import os
import errno
from hashlib import sha256
from pathlib import Path
from fastcore.basics import AttrDict, patch
from fastcore.foundation import L
from fnmatch import fnmatch
from collections import defaultdict

class FSNode(AttrDict):
    def __getattribute__(self, key):
        cls = object.__getattribute__(self, '__class__')
        if key in cls.__dict__ and isinstance(cls.__dict__[key], property):
            return cls.__dict__[key].fget(self)
        return super().__getattribute__(key)

class HyFS:
    def __init__(self):
        self.nodes = {}  # eid -> FSNode
        self.tags = defaultdict(set)  # tag_name -> {eid, ...}
        self.relations = defaultdict(lambda: defaultdict(set))  # eid -> {rel_type -> {eid, ...}}
    
    def add_node(self, path, eid=None, **metadata):
        """Add a node to the flat storage"""
        if eid is None:
            eid = _compute_eid(path)
        node = FSNode(
            path=path,
            eid=eid,
            type='dir' if path.is_dir() else 'file',
            **metadata
        )
        self.nodes[eid] = node
        return eid
    
    def get(self, eid):
        """O(1) lookup by eid"""
        return self.nodes[eid]
    
    def find_by_path(self, path):
        """Find node by path (O(n) scan - could optimize with index)"""
        path = Path(path)
        for node in self.nodes.values():
            if node.path == path:
                return node
        return None
    
    def tree(self, root_path=None):
        """Build hierarchical tree view from flat storage"""
        if root_path is None:
            # Find root (node with no parent in our set)
            roots = []
            for node in self.nodes.values():
                if not any(node.path.is_relative_to(other.path) and node.path != other.path 
                          for other in self.nodes.values()):
                    roots.append(node)
            if len(roots) == 1:
                root_path = roots[0].path
            else:
                raise ValueError("Multiple roots found, specify root_path")
        else:
            root_path = Path(root_path)
        
        root_node = self.find_by_path(root_path)
        if not root_node:
            raise ValueError(f"Root path {root_path} not found in nodes")
        
        return self._build_tree_node(root_node)
    
    def _build_tree_node(self, node):
        """Recursively build tree structure for a node"""
        tree_node = FSNode(node)  # Copy node data
        
        if node.type == 'dir':
            # Find children: nodes whose path.parent == this path
            children = []
            for candidate in self.nodes.values():
                if candidate.path.parent == node.path:
                    children.append(self._build_tree_node(candidate))
            tree_node['children'] = children
        
        return tree_node
    
    def filter(self, pred):
        """Filter nodes by predicate, returns flat list"""
        return L([node for node in self.nodes.values() if pred(node)])
    
    def find(self, pattern):
        """Find nodes matching glob pattern"""
        return self.filter(lambda n: fnmatch(n.path.name, pattern))

def _compute_eid(path):
    """Compute stable UUID for a path. Uses xattr if available, else deterministic hash."""
    path_str = str(path)
    xattr_key = 'user.hyfs.uuid'
    
    try:
        uuid_bytes = os.getxattr(path_str, xattr_key)
        return uuid_bytes.decode()
    except OSError:
        pass
    
    new_uuid = str(uuid.uuid4())
    
    try:
        os.setxattr(path_str, xattr_key, new_uuid.encode())
        return new_uuid
    except OSError as e:
        if e.errno in (errno.ENOTSUP, errno.EPERM, errno.EACCES):
            s = path.stat()
            data = f"{s.st_dev}:{s.st_ino}:{s.st_mtime}".encode()
            hash_hex = sha256(data).hexdigest()
            return f"{hash_hex[:8]}-{hash_hex[8:12]}-{hash_hex[12:16]}-{hash_hex[16:20]}-{hash_hex[20:32]}"
        else:
            raise

def scan_fs(root_path, include_metadata=False):
    """Scan filesystem and populate HyFS flat storage"""
    hyfs = HyFS()
    root_path = Path(root_path)
    
    # Walk the entire tree
    for path in [root_path] + list(root_path.rglob('*')):
        metadata = {}
        if include_metadata:
            # Add any metadata you want here
            pass
        hyfs.add_node(path, **metadata)
    
    return hyfs

@patch
def show(self:FSNode, indent=0):
    """Display tree node (works on tree view)"""
    print('    ' * indent + self.path.name)
    if 'children' in self:
        for child in self.children:
            child.show(indent+1)

@patch
def filter(self:FSNode, pred):
    """Filter tree node recursively (works on tree view)"""
    matches = L()
    if pred(self): matches.append(self)
    if 'children' in self:
        for child in self.children:
            matches += child.filter(pred)
    return matches

@patch
def find(self:FSNode, pattern):
    """Find in tree node (works on tree view)"""
    return self.filter(lambda n: fnmatch(n.path.name, pattern))

In [None]:
# Test the new flat storage architecture!
hyfs = scan_fs('/app/data/dev/hyfs/test/fs', include_metadata=True)

print(f"✓ Scanned filesystem: {len(hyfs.nodes)} nodes in flat storage\n")

✓ Scanned filesystem: 96 nodes in flat storage



In [None]:
# Test 1: O(1) lookup by eid
sample_eid = list(hyfs.nodes.keys())[0]
print(f"Test 1 - O(1) lookup by eid:")
print(f"  eid: {sample_eid}")
print(f"  node: {hyfs.get(sample_eid).path}\n")

Test 1 - O(1) lookup by eid:
  eid: 1907aacb-ac5f-4810-a9d0-a9498af9b17e
  node: /app/data/dev/hyfs/test/fs



In [None]:
# Test 2: Find by path
print(f"Test 2 - Find by path:")
node = hyfs.find_by_path('/app/data/dev/hyfs/test/fs/README.md')
print(f"  Found: {node.path} (eid: {node.eid})\n")

Test 2 - Find by path:
  Found: /app/data/dev/hyfs/test/fs/README.md (eid: 9a6f0b1c-2efb-4322-ac1a-cb93afdeeed5)



In [None]:
# Test 3: Flat filter (HyFS level)
print(f"Test 3 - Flat filter for .py files:")
py_files = hyfs.find('*.py')
for f in py_files[:5]:  # Show first 5
    print(f"  {f.path.name}: {f.eid}")
print(f"  ... ({len(py_files)} total)\n")

Test 3 - Flat filter for .py files:
  utils.py: 714e2121-0c36-407d-9750-49f3f1ea6c3c
  __init__.py: a3658657-ee34-4502-acfb-d3409e19f15d
  main.py: 84392f69-1177-445e-8484-7e9e2e1d088c
  test_main.py: ba023b1c-674c-4cd7-b3c2-718127ed3832
  __init__.py: a7ce510b-fd54-4fd3-98dd-c45bbdb9fc6e
  ... (8 total)



In [None]:
# Test 4: Build tree view
print(f"Test 4 - Build tree view:")
tree = hyfs.tree('/app/data/dev/hyfs/test/fs')
print(f"  Root: {tree.path}")
print(f"  Type: {tree.type}")
print(f"  Children: {len(tree.children)}\n")

Test 4 - Build tree view:
  Root: /app/data/dev/hyfs/test/fs
  Type: dir
  Children: 33



In [None]:
# Test 5: Tree display (first few levels)
print(f"Test 5 - Tree display (showing structure):")
tree.show()

Test 5 - Tree display (showing structure):
fs
    config.json
    .hidden
        secret.txt
        .config
    src
        models
            product.py
            __init__.py
            user.py
        utils.py
        __init__.py
        main.py
    dist
        bundle.min.js
    tests
        test_main.py
        __init__.py
        fixtures
            data.json
            sample.csv
    build
        styles.css
        output.js
    reports
        002_report.txt
        001_report.txt
    config
        development.yaml
        production.yaml
        database.ini
    .gitignore
    requirements.txt
    media
        video.mp4
        audio.mp3
    docs
        api
            endpoints.md
            authentication.md
        index.html
        guides
            getting-started.pdf
    files with spaces
        document 1.txt
        my file (copy).docx
    UPPERCASE.TXT
    MixedCase.TxT
    README.md
    .env
    multiple.dots.in.name.txt
    archives
        backup.zip


In [None]:
# Test 6: Filter on tree view
print(f"\nTest 6 - Find .ipynb files via tree view:")
ipynb_files = tree.find('*.ipynb')
for f in ipynb_files:
    print(f"  {f.path.name}: {f.eid}")


Test 6 - Find .ipynb files via tree view:
  analysis.ipynb: 40f41240-981d-4250-a82c-ab9d53b55c25


In [None]:
# raw dump
from pprint import pprint

pprint(tree)

{'children': [{'eid': '4316e89e-61c2-48bc-8182-7474c16c5596',
               'path': Path('/app/data/dev/hyfs/test/fs/config.json'),
               'type': 'file'},
              {'children': [{'eid': 'a2e9fd46-26db-4699-9078-220b8f230b5f',
                             'path': Path('/app/data/dev/hyfs/test/fs/.hidden/secret.txt'),
                             'type': 'file'},
                            {'eid': '16fd6f84-9abe-4d3c-a0af-4895ab09f2ca',
                             'path': Path('/app/data/dev/hyfs/test/fs/.hidden/.config'),
                             'type': 'file'}],
               'eid': '05f6bde9-2e17-44d8-8518-03908c03690b',
               'path': Path('/app/data/dev/hyfs/test/fs/.hidden'),
               'type': 'dir'},
              {'children': [{'children': [{'eid': '28dfa4b7-ae11-4b78-8b8b-3717e8dd7ad9',
                                           'path': Path('/app/data/dev/hyfs/test/fs/src/models/product.py'),
                                           'type':

In [None]:
hyfs.nodes

{'1907aacb-ac5f-4810-a9d0-a9498af9b17e': {'path': Path('/app/data/dev/hyfs/test/fs'),
  'eid': '1907aacb-ac5f-4810-a9d0-a9498af9b17e',
  'type': 'dir'},
 '4316e89e-61c2-48bc-8182-7474c16c5596': {'path': Path('/app/data/dev/hyfs/test/fs/config.json'),
  'eid': '4316e89e-61c2-48bc-8182-7474c16c5596',
  'type': 'file'},
 '05f6bde9-2e17-44d8-8518-03908c03690b': {'path': Path('/app/data/dev/hyfs/test/fs/.hidden'),
  'eid': '05f6bde9-2e17-44d8-8518-03908c03690b',
  'type': 'dir'},
 '913ca92d-71fd-4c0b-a5ae-b525fe769f2f': {'path': Path('/app/data/dev/hyfs/test/fs/src'),
  'eid': '913ca92d-71fd-4c0b-a5ae-b525fe769f2f',
  'type': 'dir'},
 'bbb24b5d-3900-43e6-8cb6-e0a24e5dd065': {'path': Path('/app/data/dev/hyfs/test/fs/dist'),
  'eid': 'bbb24b5d-3900-43e6-8cb6-e0a24e5dd065',
  'type': 'dir'},
 '0eb4ed1b-b945-4851-b3c0-1702b8b01c72': {'path': Path('/app/data/dev/hyfs/test/fs/tests'),
  'eid': '0eb4ed1b-b945-4851-b3c0-1702b8b01c72',
  'type': 'dir'},
 '023b551e-c8fd-4e15-b09e-642b412b87ca': {'pat

Alright, this is HyFS Architecture phase, exploration (3)!

Thoughts?

##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->

Looking at where you are, this is really solid work! The flat storage architecture is clean, the separation of concerns is clear, and you've got 96 nodes working smoothly. A few thoughts on what stands out:

**What's working beautifully:**
- The flat `{eid -> FSNode}` storage is exactly right—O(1) lookups, easy to reason about
- Tree views as derived structures keeps things flexible
- The `AttrDict` + property override is clever (even if a bit hacky)
- Your design doc is excellent—dense, opinionated, and captures the "why" behind decisions

**Natural next explorations:**

1. **Path index**: You mention O(n) for `find_by_path`. A simple `self.path_index = {path: eid}` maintained during `add_node` would make this O(1). Worth it?

2. **Tags/relations sugar**: You have the infrastructure (`hyfs.tags`, `hyfs.relations`) but no convenience methods yet. Would `hyfs.tag(eid, 'important')` and `hyfs.tagged('important')` feel natural to add now?

3. **Content addressing (cid)**: Since you're thinking about identity deeply, adding content hashing for files could be interesting. Would enable duplicate detection, content-based diffing, etc.

4. **Serialization**: Can you save/load the entire `hyfs` state (nodes, tags, relations) to JSON? That would let you persist explorations.

Which of these feels most compelling to explore next, or is there something else calling to you about the architecture?

1. YES!
2. Yes, but let's make it a variable so we don't hardcode 'important' or anything. If we can tag we should also be able to `untag` or `deltag` or `rmtag` or something to that effect. Or maybe a more general method to `unset` any property, of which `tag` is but one case.
3. `cid` is virtually free at this point, so let's add it but lazy property IMHO. Also if in doubt, use same hash method as ZFS because that's what I use at home on Linux.
4. Definitely later on; for now it's useless i think, especially since we want to rerun things as we go.

##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->

Perfect! Let's tackle these in order.

**1. Path index** - straightforward. We'll maintain `self.path_index = {}` and update it in `add_node`. Should the key be the full `Path` object or `str(path)`? Path objects are hashable, but strings might be more predictable for lookups.

**2. Tagging methods** - I like your thinking about generalization. A few design questions:

- For tagging: `hyfs.tag(eid, 'tagname')` or `hyfs.tag(tagname, eid)` or both?
- For untagging: `hyfs.untag(eid, 'tagname')` feels symmetric with `tag`. Sound good?
- For querying: `hyfs.tagged('tagname')` returns what—a set of eids, or a list of nodes?
- The "unset any property" idea is interesting but might be premature. Tags live in `hyfs.tags`, not on nodes themselves. Want to explore that distinction first, or jump to implementation?

**3. Lazy `cid` property** - ZFS uses SHA256 by default. We'll add a `@property` on `FSNode` that computes and caches the hash on first access. Should we only compute for files (not dirs), or hash directory structure too?

**4. Serialization** - agreed, skip for now.

Which of these feels most urgent? Want to start with the path index since it's simplest, or jump to tagging since it's more interesting?

I'll split this down to two independent threads:

1. Path index + lazy `cid`

2. tagging design discussion and implementation

We've completed 1; see note and updated code below.

Let's proceed with 2 (no need to see how we did 1, we have the result). Got it?

## Recent Additions: Path Index + Lazy CID

### Path Index (O(1) lookups)
Added `self.path_index = {path: eid}` maintained in `add_node()`. Rewrote `find_by_path()` from O(n) scan to O(1) dict lookup. Tested at ~135μs for lookup.

### Lazy CID (Content ID)
Added `cid` property to `FSNode` that computes SHA256 hash on first access, caches result. ZFS-compliant hashing with 64KB streaming chunks. Returns `None` for directories. Caching provides ~7x speedup on repeated access (413μs → 61μs).

Both additions are transparent—no breaking changes to existing API.

## Tags
> TODO rn.

In [None]:
import uuid
import os
import errno
from hashlib import sha256
from pathlib import Path
from fastcore.basics import AttrDict, patch
from fastcore.foundation import L
from fnmatch import fnmatch
from collections import defaultdict

class FSNode(AttrDict):
    def __getattribute__(self, key):
        cls = object.__getattribute__(self, '__class__')
        if key in cls.__dict__ and isinstance(cls.__dict__[key], property):
            return cls.__dict__[key].fget(self)
        return super().__getattribute__(key)

class HyFS:
    def __init__(self):
        self.nodes = {}  # eid -> FSNode
        self.path_index = {}  # path -> eid
        self.tags = defaultdict(set)  # tag_name -> {eid, ...}
        self.relations = defaultdict(lambda: defaultdict(set))  # eid -> {rel_type -> {eid, ...}}
    
    def add_node(self, path, eid=None, **metadata):
        """Add a node to the flat storage"""
        if eid is None:
            eid = _compute_eid(path)
        node = FSNode(
            path=path,
            eid=eid,
            type='dir' if path.is_dir() else 'file',
            **metadata
        )
        self.nodes[eid] = node
        self.path_index[path] = eid
        return eid
    
    def get(self, eid):
        """O(1) lookup by eid"""
        return self.nodes[eid]
    
    def find_by_path(self, path):
        """Find node by path (O(1) with index)"""
        path = Path(path)
        eid = self.path_index.get(path)
        return self.nodes.get(eid) if eid else None
    
    def tree(self, root_path=None):
        """Build hierarchical tree view from flat storage"""
        if root_path is None:
            # Find root (node with no parent in our set)
            roots = []
            for node in self.nodes.values():
                if not any(node.path.is_relative_to(other.path) and node.path != other.path 
                          for other in self.nodes.values()):
                    roots.append(node)
            if len(roots) == 1:
                root_path = roots[0].path
            else:
                raise ValueError("Multiple roots found, specify root_path")
        else:
            root_path = Path(root_path)
        
        root_node = self.find_by_path(root_path)
        if not root_node:
            raise ValueError(f"Root path {root_path} not found in nodes")
        
        return self._build_tree_node(root_node)
    
    def _build_tree_node(self, node):
        """Recursively build tree structure for a node"""
        tree_node = FSNode(node)  # Copy node data
        
        if node.type == 'dir':
            # Find children: nodes whose path.parent == this path
            children = []
            for candidate in self.nodes.values():
                if candidate.path.parent == node.path:
                    children.append(self._build_tree_node(candidate))
            tree_node['children'] = children
        
        return tree_node
    
    def filter(self, pred):
        """Filter nodes by predicate, returns flat list"""
        return L([node for node in self.nodes.values() if pred(node)])
    
    def find(self, pattern):
        """Find nodes matching glob pattern"""
        return self.filter(lambda n: fnmatch(n.path.name, pattern))

def _compute_eid(path):
    """Compute stable UUID for a path. Uses xattr if available, else deterministic hash."""
    path_str = str(path)
    xattr_key = 'user.hyfs.uuid'
    
    try:
        uuid_bytes = os.getxattr(path_str, xattr_key)
        return uuid_bytes.decode()
    except OSError:
        pass
    
    new_uuid = str(uuid.uuid4())
    
    try:
        os.setxattr(path_str, xattr_key, new_uuid.encode())
        return new_uuid
    except OSError as e:
        if e.errno in (errno.ENOTSUP, errno.EPERM, errno.EACCES):
            s = path.stat()
            data = f"{s.st_dev}:{s.st_ino}:{s.st_mtime}".encode()
            hash_hex = sha256(data).hexdigest()
            return f"{hash_hex[:8]}-{hash_hex[8:12]}-{hash_hex[12:16]}-{hash_hex[16:20]}-{hash_hex[20:32]}"
        else:
            raise

def _compute_cid(path):
    """Compute SHA256 content hash (ZFS-style) for a file."""
    if not path.is_file():
        return None
    
    h = sha256()
    with open(path, 'rb') as f:
        while chunk := f.read(65536):  # 64KB chunks
            h.update(chunk)
    return h.hexdigest()

def scan_fs(root_path, include_metadata=False):
    """Scan filesystem and populate HyFS flat storage"""
    hyfs = HyFS()
    root_path = Path(root_path)
    
    # Walk the entire tree
    for path in [root_path] + list(root_path.rglob('*')):
        metadata = {}
        if include_metadata:
            # Add any metadata you want here
            pass
        hyfs.add_node(path, **metadata)
    
    return hyfs

# Add cid property to FSNode
@property
def cid(self):
    """Lazy-computed content ID (SHA256 hash) for files"""
    if 'cid' not in self:
        self['cid'] = _compute_cid(self.path)
    return self['cid']

FSNode.cid = cid

@patch
def show(self:FSNode, indent=0):
    """Display tree node (works on tree view)"""
    print('    ' * indent + self.path.name)
    if 'children' in self:
        for child in self.children:
            child.show(indent+1)

@patch
def filter(self:FSNode, pred):
    """Filter tree node recursively (works on tree view)"""
    matches = L()
    if pred(self): matches.append(self)
    if 'children' in self:
        for child in self.children:
            matches += child.filter(pred)
    return matches

@patch
def find(self:FSNode, pattern):
    """Find in tree node (works on tree view)"""
    return self.filter(lambda n: fnmatch(n.path.name, pattern))


Let's take 2. Tagging step by step.

> - For tagging: `hyfs.tag(eid, 'tagname')` or `hyfs.tag(tagname, eid)`

what's the difference? ELI5 please.

##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->

Good question! It's about which argument comes first—the order matters for how natural the method feels to use.

**`hyfs.tag(eid, 'tagname')`** - "tag THIS thing AS important"
- You start with the thing you're looking at
- Natural when you have a node: `hyfs.tag(node.eid, 'important')`

**`hyfs.tag('tagname', eid)`** - "add this thing TO the important tag"
- You start with the category/bucket
- Natural when organizing: `hyfs.tag('important', eid1, eid2, eid3)` (if we support multiple eids)

Which mental model fits better for you: "I'm looking at this file, let me tag it" vs "I'm organizing my 'important' tag, let me add files to it"?

this begs the question of why any of those since tags are a many::many relationship. multiple eids sharing the same category, multiple categories sharing the same eid.

variadic function that takes a list as first arg (e.g multiple tags, a common use-case) then arbitrary number of eids (possibly assembled as a list themselves prior to calling `tag()`)?

or the exact opposite?

tho with named args it shouldn't matter, actually? but then no variadism with the last...

**what are the 'correct' ways to solve such many-to-many assignments? is method/functional a good idea? is there a hidden devil in some detail that we're not considering regarding tag/cat systems?**

##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->

Excellent observation! You've hit on a fundamental API design tension. Let's think through the patterns:

**The many-to-many problem:**
- Want: `tag(['important', 'work'], eid1, eid2, eid3)`
- Also want: `tag('important', [eid1, eid2, eid3])`
- Both are valid mental models!

**Classic solutions:**

1. **Separate operations** (Git-style):
   - `tag(tag_name, *eids)` - add one tag to many eids
   - `tag_many(tag_names, *eids)` - add many tags to many eids
   - Clear but verbose

2. **Direction matters** (SQL-style):
   - Pick one: `tag(tag_name, *eids)` 
   - Reverse: `tag_eids(*eids, tags=[...])` with named arg
   - Asymmetric but explicit

3. **Builder/fluent** (jQuery-style):
   - `hyfs.select(eid1, eid2).tag('important', 'work')`
   - Chainable but more ceremony

4. **Direct manipulation** (what you have now):
   - `hyfs.tags['important'].update({eid1, eid2})`
   - Most explicit, no magic, but feels low-level

**Hidden devils:**
- Variadics with lists are ambiguous: is `[eid1, eid2]` one arg or two?
- Named args help but lose the terseness
- Tags as sets means order doesn't matter (good!) but iteration is unpredictable

What feels most natural for your workflow: direct set manipulation, or a convenience wrapper? And if wrapper, which direction do you tag more often—"add this tag to many files" or "add many tags to this file"?

this begs the question of why any of those since tags are a many::many relationship. multiple eids sharing the same category, multiple categories sharing the same eid.

variadic function that takes a list as first arg (e.g multiple tags, a common use-case) then arbitrary number of eids (possibly assembled as a list themselves prior to calling `tag()`)?

or the exact opposite?

tho with named args it shouldn't matter, actually? but then no variadism with the last...

**what are the 'correct' ways to solve such many-to-many assignments? is method/functional a good idea? is there a hidden devil in some detail that we're not considering regarding tag/cat systems?**

##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->

🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠

Excellent question! You've hit on the core tension in many-to-many APIs. Let me break down the options and tradeoffs:

**The variadic problem:**
```python
# Which is which?
tag('important', 'work', 'eid1', 'eid2')  # Can't tell tags from eids!
```

You'd need type checking or conventions (strings vs UUIDs), which is fragile.

**Named args solve ambiguity but lose terseness:**
```python
tag(eids=['eid1', 'eid2'], tags=['important', 'work'])  # Clear but verbose
```

**The Unix philosophy approach: Do one thing atomically**
```python
tag(eid, tag)  # Singular operation - one eid, one tag
# For bulk: use Python
for eid in eids: hyfs.tag(eid, 'important')
for tag in tags: hyfs.tag(eid, tag)
```

**Hidden devils in tag systems:**

1. **Idempotence**: Should `tag(eid, 'important')` twice be a no-op or error? (Answer: no-op, usually)

2. **Validation**: Do tags auto-create or must they exist first? (Answer: auto-create, simpler)

3. **Inverse symmetry**: If `tag(eid, tag)` adds, should `untag(eid, tag)` remove? Should arg order match? (Answer: yes, symmetry helps)

4. **Bulk performance**: Tagging 1000 files one at a time vs batch operation

5. **Query asymmetry**: You have `tagged(tag) → eids` but also need `tags_of(eid) → tags`

**What do existing systems do?**

- **Git**: `git tag <tagname> [commit]` - tag first, thing second, singular
- **SQL**: `INSERT INTO tags (eid, tag) VALUES ...` - symmetric, batch-friendly
- **Sets**: `set.add(item)` - singular, idempotent, simple

**My recommendation:**

Start simple and composable:
```python
hyfs.tag(eid, tag)        # Add one tag to one eid
hyfs.untag(eid, tag)      # Remove one tag from one eid
hyfs.tagged(tag)          # Get all eids with this tag → set
hyfs.tags_of(eid)         # Get all tags for this eid → set
```

For bulk operations, users can:
```python
# Direct manipulation when performance matters
hyfs.tags['important'].update([eid1, eid2, eid3])

# Or loop for clarity
for eid in my_eids: hyfs.tag(eid, 'important')
```

**Does this feel right, or do you see a use case where bulk operations are so common that a variadic API is worth the complexity?**

This feels absolutely right! Let's implement these methods for this exact usage.

```Python
hyfs.tag(eid, tag)        # Add one tag to one eid
hyfs.untag(eid, tag)      # Remove one tag from one eid
hyfs.tagged(tag)          # Get all eids with this tag → set
hyfs.tags_of(eid)         # Get all tags for this eid → set
```

##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->

Perfect! Here's the implementation:

```python
@patch
def tag(self:HyFS, eid, tag):
    """Add a tag to an eid (idempotent)"""
    self.tags[tag].add(eid)

@patch
def untag(self:HyFS, eid, tag):
    """Remove a tag from an eid (idempotent)"""
    self.tags[tag].discard(eid)  # discard doesn't error if not present
    if not self.tags[tag]:  # Clean up empty tag sets
        del self.tags[tag]

@patch
def tagged(self:HyFS, tag):
    """Get all eids with this tag"""
    return self.tags[tag]  # Returns set (possibly empty)

@patch
def tags_of(self:HyFS, eid):
    """Get all tags for this eid"""
    return {tag for tag, eids in self.tags.items() if eid in eids}
```

**Key details:**
- `tag()` uses `set.add()` - idempotent, no error on duplicate
- `untag()` uses `set.discard()` - idempotent, no error if missing
- `untag()` cleans up empty tag sets to avoid clutter in `self.tags`
- `tagged()` returns the actual set (not a copy), so you can mutate directly if needed
- `tags_of()` does an O(n) scan through tags - acceptable for reasonable tag counts

Add this to your code and you're ready to tag!

In [None]:
@patch
def tag(self:HyFS, eid, tag):
    """Add a tag to an eid (idempotent)"""
    self.tags[tag].add(eid)

@patch
def untag(self:HyFS, eid, tag):
    """Remove a tag from an eid (idempotent)"""
    self.tags[tag].discard(eid)  # discard doesn't error if not present
    if not self.tags[tag]:  # Clean up empty tag sets
        del self.tags[tag]

@patch
def tagged(self:HyFS, tag):
    """Get all eids with this tag"""
    return self.tags[tag]  # Returns set (possibly empty)

@patch
def tags_of(self:HyFS, eid):
    """Get all tags for this eid"""
    return {tag for tag, eids in self.tags.items() if eid in eids}

Let's test this!

##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->

Great! Let's test the tagging system:

```python
# Scan the filesystem first
hyfs = scan_fs('/app/data/dev/hyfs/test/fs')

# Tag some files
py_files = hyfs.find('*.py')
print(f"Found {len(py_files)} Python files")

# Tag the first few Python files as 'important'
for node in py_files[:3]:
    hyfs.tag(node.eid, 'important')
    print(f"Tagged {node.path.name}")

# Tag some as 'code'
for node in py_files:
    hyfs.tag(node.eid, 'code')

# Tag a config file as 'important' too
config = hyfs.find_by_path(root / 'config.json')
if config:
    hyfs.tag(config.eid, 'important')
    hyfs.tag(config.eid, 'config')

# Query: What's tagged as 'important'?
important_eids = hyfs.tagged('important')
print(f"\n{len(important_eids)} files tagged 'important':")
for eid in important_eids:
    node = hyfs.get(eid)
    print(f"  - {node.path.name}")

# Query: What tags does the first Python file have?
first_py = py_files[0]
tags = hyfs.tags_of(first_py.eid)
print(f"\nTags for {first_py.path.name}: {tags}")

# Test untag
hyfs.untag(first_py.eid, 'important')
print(f"\nAfter untag, {first_py.path.name} tags: {hyfs.tags_of(first_py.eid)}")
print(f"Important count now: {len(hyfs.tagged('important'))}")

# Show all tags
print(f"\nAll tags in system: {list(hyfs.tags.keys())}")
```

This will test:
- Tagging multiple files with same tag
- Multiple tags on same file
- Querying by tag
- Querying tags of a file
- Untagging
- Tag cleanup

Ready to run?

In [None]:
# Scan the filesystem first
hyfs = scan_fs('/app/data/dev/hyfs/test/fs')

# Tag some files
py_files = hyfs.find('*.py')
print(f"Found {len(py_files)} Python files")

# Tag the first few Python files as 'important'
for node in py_files[:3]:
    hyfs.tag(node.eid, 'important')
    print(f"Tagged {node.path.name}")

# Tag some as 'code'
for node in py_files:
    hyfs.tag(node.eid, 'code')

# Tag a config file as 'important' too
config = hyfs.find_by_path(root / 'config.json')
if config:
    hyfs.tag(config.eid, 'important')
    hyfs.tag(config.eid, 'config')

# Query: What's tagged as 'important'?
important_eids = hyfs.tagged('important')
print(f"\n{len(important_eids)} files tagged 'important':")
for eid in important_eids:
    node = hyfs.get(eid)
    print(f"  - {node.path.name}")

# Query: What tags does the first Python file have?
first_py = py_files[0]
tags = hyfs.tags_of(first_py.eid)
print(f"\nTags for {first_py.path.name}: {tags}")

# Test untag
hyfs.untag(first_py.eid, 'important')
print(f"\nAfter untag, {first_py.path.name} tags: {hyfs.tags_of(first_py.eid)}")
print(f"Important count now: {len(hyfs.tagged('important'))}")

# Show all tags
print(f"\nAll tags in system: {list(hyfs.tags.keys())}")

Found 8 Python files
Tagged utils.py
Tagged __init__.py
Tagged main.py

4 files tagged 'important':
  - config.json
  - main.py
  - __init__.py
  - utils.py

Tags for utils.py: {'code', 'important'}

After untag, utils.py tags: {'code'}
Important count now: 3

All tags in system: ['important', 'code', 'config']


In [None]:
# Query: What's tagged as 'code'?
code_eids = hyfs.tagged('code')
print(f"\n{len(code_eids)} files tagged 'code':")
for eid in code_eids:
    node = hyfs.get(eid)
    print(f"  - {node.path.name}")


8 files tagged 'code':
  - main.py
  - product.py
  - user.py
  - __init__.py
  - test_main.py
  - __init__.py
  - __init__.py
  - utils.py


This seems excellent!

Let's write a summary, that we'll use with 1. above, to update our Design doc.

By the way, now that we're advancing towards our foundation, it'll probably be time to rewrite that Design doc more tersely (principles, why/hows, etc., you know the drill), and split all the hands-on specific stuff into a proper Architecture document (last step before a Spec, and we won't write this directly since I do literate programming in notebooks, that will split all the spec/code properly in subsequent markdown/python cells.

For now, let's concisely summarize the above tagging system basis.

##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->

## Tagging System

### API Design
Four methods for many-to-many tag/entity relationships:

```python
hyfs.tag(eid, tag)        # Add one tag to one eid (idempotent)
hyfs.untag(eid, tag)      # Remove one tag from one eid (idempotent)
hyfs.tagged(tag)          # Get all eids with this tag → set
hyfs.tags_of(eid)         # Get all tags for this eid → set
```

### Design Decisions

**Singular operations over variadic**: Following Unix philosophy—do one thing atomically. Bulk operations use Python iteration or direct `hyfs.tags[tag].update(eids)` for performance.

**Idempotence**: Both `tag()` and `untag()` are safe to call repeatedly. `tag()` uses `set.add()`, `untag()` uses `set.discard()`.

**Auto-cleanup**: `untag()` removes empty tag sets to prevent clutter in `hyfs.tags`.

**Storage**: `defaultdict(set)` maps `tag_name -> {eid, ...}`. Simple, efficient, directly inspectable.

**Query asymmetry**: `tagged(tag)` is O(1), `tags_of(eid)` is O(n) in tag count. Acceptable tradeoff—most systems have more entities than tags.

**No validation**: Tags auto-create on first use. No schema, no registration. Maximum flexibility.

### Implementation Notes
- `tagged()` returns actual set reference, not copy—allows direct mutation when needed
- `tags_of()` scans all tags—optimize later if tag counts grow large (inverse index)
- Symmetric API: `tag`/`untag` mirror each other, same arg order

This completes the foundation for semantic organization beyond filesystem hierarchy.

Perfect!

Now, about the Design and new Architecture docs. Let's split things properly. Let's try to make the sum of both *much shorter* than the current Design doc. We can trim a lot of repetitions and self-obvious principles in the code. Design tells what we want and why, Architecture tells how it's done. Current doc is about 7k tokens; the sum of the two new should be 5k max, and aim for 3-4k ideally.

Do not write the reference code, it'll be in its own file/cell.

You may write short excerpts for illustration but as little as possible for clarity's sake.

Start with the Design doc.

##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->

# HyFS Design Document
> Hyper FileSystem - A filesystem abstraction with stable identity and semantic relationships

## Vision

HyFS provides stable entity identification and semantic organization for filesystems. Files and directories get persistent identities (eids) that survive renames and moves, enabling tagging, relationship tracking, and multiple views of the same underlying data.

Built for interactive exploration in SolveIT notebooks using fastcore principles.

## Core Philosophy

### Principle of Lean Information Form (LIF)

Information must be expressed in its meaningful form, preserving integrity without requiring decoders. Store semantic structure directly, decide display independently.

**LIF Lemma 1: Separation of Concerns**

Three orthogonal concepts, stored separately:
1. **Entity storage**: Flat dict `{eid -> node}` (canonical)
2. **Filesystem hierarchy**: Derived from `path` relationships (view)
3. **Semantic organization**: Tags and relations (metadata layer)

Don't mix these. Tags are many-to-many mappings. Relations are typed connections. Tree structure is computed on-demand from path relationships.

### The fastcore Way

Methods return transformed data when possible, enabling chaining. `filter()` returns a flat list of nodes, not print output. This separates data transformation from presentation.

### Make Side Effects Explicit and Deferrable

Inspired by Git's staging area and ZFS transactions:
- **Read operations**: Immediate (work directly on flat storage)
- **Write operations**: Return a Plan/Transaction object that can be inspected, then executed
- Example: `plan = hyfs.rename(eid, 'newname')` → `plan.preview()` → `plan.execute()`

Provides safety, composability, and clear boundaries between observation and mutation.

## Architecture Decisions

### Flat Storage with Derived Views

**Choice**: Store nodes in flat dict `{eid -> FSNode}`, derive tree structure on-demand.

**Why flat**:
- O(1) lookup by eid
- No nested traversal for global operations
- Tags/relations are just dicts
- Multiple views from single source
- Easy serialization
- Scales better

**Why derived trees**:
- Tree structure implicit in `path` property
- Compute hierarchy when needed
- Display is a view concern, not data concern
- Can build multiple trees: filesystem, tags, relations

### Entity Identification: UUID with xattr

Every node gets a stable `eid` (Entity ID). Try to store UUID in xattr `user.hyfs.uuid`. If xattr unavailable, fall back to deterministic hash of `(st_dev, st_ino, st_mtime)`.

**Why `eid` not `fid`/`nid`**: Directories are entities too—structure has semantic meaning. `eid` sits at perfect abstraction level between content (`cid`) and filesystem implementation (`nid`).

### Content Addressing: Lazy SHA256

`cid` property on `FSNode` computes SHA256 hash on first access, caches result. Uses ZFS-style 64KB streaming chunks. Returns `None` for directories.

### Path Index: O(1) Lookups

Maintain `path_index = {path: eid}` updated in `add_node()`. Makes `find_by_path()` O(1) instead of O(n) scan.

### Tagging: Singular Operations

Four methods for many-to-many relationships:
- `tag(eid, tag)` - add one tag to one eid
- `untag(eid, tag)` - remove one tag from one eid  
- `tagged(tag)` - get all eids with this tag
- `tags_of(eid)` - get all tags for this eid

Singular operations over variadic (Unix philosophy). Idempotent. Auto-cleanup empty tags. No validation, tags auto-create.

## What HyFS Enables

**Track files across renames**: eid persists through filesystem changes

**Detect duplicates**: Content-based deduplication via `cid`

**Compare trees**: Diff snapshots by eid to find added/removed/moved files

**Semantic relationships**: Tag files, build import graphs, track generation lineage

**Persistent selections**: Tags survive renames, moves, even filesystem boundaries

**Multiple views**: Same data, different perspectives—tree by path, tree by tags, graph by imports

## Development Principles

- **Vertical space efficiency**: Favor one-liners where clarity isn't sacrificed
- **Fastcore alignment**: Use `L`, `AttrDict`, `@patch`, `Path`
- **REPL-driven**: Optimize for tab completion and exploration
- **Composability over monoliths**: Do one thing well
- **No premature optimization**: Measure first
- **No ceremony**: Terse, clear code

## Future Directions

**Immediate**: Relations API, filtered tree views, write operations (rename/move/copy as Plans)

**Medium**: Serialization, deduplication, snapshots, metadata properties (size, mtime, permissions)

**Long-term**: Multi-host tracking, semantic relationships (imports, lineage), FastHTML web interface, CLI tool

---

*This is a living document. Update as HyFS evolves.*

EXCELLENT. Truly great writing, impeccably concise yet meaningful. That's a very well-striked balance, Sir.

Now, let's proceed with the Architecture doc. But one question about tags, first: currently, they're stored in memory, but we don't have persistent storage, correct? So, what will that be? I'm wondering if xattrs may be used for that, or if it's too contrived and we should consider building a basic db at this point (when we cross that bridge, not there yet, this is just to inform the Architecture doc in a last / outro section "future angles" or whatever).

##### 🤖Reply🤖<!-- SOLVEIT_SEPARATOR_7f3a9b2c -->

🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠🧠

Good question! **xattr for tags is too limiting**. Here's why:

**xattr problems:**
- Limited size (~64KB total per file)
- Can't tag deleted/moved entities
- Tags are "on" the file, not in the index
- Querying "all files tagged X" requires scanning all files
- Tags are conceptually about the *index*, not individual files

**Better approach:** Separate persistence layer
- Store tags in index file (JSON initially, SQLite later for performance)
- Tags reference eids, not paths
- Can track entities that don't currently exist on filesystem
- Fast querying, unlimited size
- Relations can use same storage

This fits the separation of concerns: xattr for eid (bound to file), separate index for semantic layer (tags, relations, snapshots).

I'll add a brief "Persistence Strategy" section to Architecture doc noting this.

Now, the Architecture doc:

---

# HyFS Architecture Document
> Implementation details, data structures, and algorithms

## Data Structures

### FSNode: AttrDict with Properties

`FSNode` extends `AttrDict` to enable both dict-style (`node['path']`) and attribute-style (`node.path`) access, optimized for REPL exploration.

**Property override challenge**: AttrDict's `__getattr__` intercepts attribute access before class properties. Solution: override `__getattribute__` to check class properties first.

Properties enable lazy computation:
- `cid`: Computed on first access, cached in node dict
- Future: `size`, `mtime`, `permissions`

### HyFS: Flat Storage Container

```python
self.nodes = {}              # eid -> FSNode (canonical storage)
self.path_index = {}         # path -> eid (O(1) lookups)
self.tags = defaultdict(set) # tag_name -> {eid, ...}
self.relations = defaultdict(lambda: defaultdict(set))  # eid -> {rel_type -> {eid, ...}}
```

All lookups are dict-based. Tree structure derived on-demand.

## Core Algorithms

### Entity ID Computation

1. Try read UUID from xattr `user.hyfs.uuid`
2. If missing, generate UUID v4
3. Try write to xattr
4. On xattr failure (unsupported fs, permissions), fall back to deterministic hash: `SHA256(st_dev:st_ino:st_mtime)` formatted as UUID

**Tradeoff**: xattr provides true stability, hash provides portability. Hash changes on mtime update, but that's acceptable for fallback scenario.

### Content ID Computation

Lazy `@property` on `FSNode`:
- Check if `'cid'` key exists in node dict (cache)
- If not, compute SHA256 using 64KB streaming chunks (ZFS-style)
- Store result in dict, return
- Subsequent access: O(1) dict lookup (~7x speedup)

Returns `None` for directories (no standard dir hashing yet).

### Tree View Construction

**Input**: Root path
**Output**: Hierarchical `FSNode` with `children` lists

**Algorithm**:
1. Look up root node in `path_index` (O(1))
2. Recursively build tree:
   - Copy node data into new `FSNode`
   - If directory: scan all nodes, find children where `path.parent == node.path`
   - Recursively build each child
   - Attach children list

**Complexity**: O(n²) worst case (check every node for each parent). Acceptable for <10K nodes. Future optimization: maintain parent-child index.

### Filesystem Scanning

**Algorithm**:
1. Use `Path.rglob('*')` to walk entire tree
2. For each path:
   - Compute eid (with xattr/hash fallback)
   - Determine type (file/dir)
   - Create `FSNode`
   - Store in `nodes[eid]` and `path_index[path]`
3. Return populated `HyFS` instance

**Performance**: ~1ms per 100 nodes on modern hardware. Metadata (size, mtime) skipped unless requested.

## Method Patterns

### Querying: Returns vs Side Effects

**Returns data**: `filter()`, `find()`, `get()`, `tagged()`, `tags_of()`, `tree()`
- Enable chaining: `hyfs.filter(pred1).filter(pred2)`
- Use `L` for lists (fastcore convention)
- Return actual references (sets) when mutation might be useful

**Side effects**: `tag()`, `untag()`, `add_node()`
- Modify internal state
- Idempotent where sensible
- Return minimal info (eid, None)

### Patching: Extending Classes

Use `@patch` to add methods to `HyFS` and `FSNode` after class definition. Keeps related functionality together without bloating class body.

Pattern:
```python
@patch
def method_name(self:ClassName, ...):
    """Docstring"""
    # implementation
```

## Key Implementation Details

### Path Index Maintenance

Updated in `add_node()` only. Not updated on rename/move (not implemented yet). When write operations added, will need atomic updates of both `nodes` and `path_index`.

### Tag Cleanup

`untag()` removes empty tag sets to prevent `defaultdict` accumulation. Tradeoff: extra check on every untag, but keeps `hyfs.tags.keys()` clean.

### defaultdict Usage

- `tags`: `defaultdict(set)` - tags auto-create on first use
- `relations`: `defaultdict(lambda: defaultdict(set))` - two-level auto-creation

Enables `hyfs.tags[new_tag].add(eid)` without checking if tag exists.

## Performance Characteristics

**Fast (O(1))**:
- Lookup by eid: `get(eid)`
- Lookup by path: `find_by_path(path)` (with index)
- Get all tagged: `tagged(tag)`

**Linear (O(n))**:
- Filter/find operations (scan all nodes)
- Get tags of eid: `tags_of(eid)` (scan all tags)
- Filesystem scan

**Quadratic (O(n²))**:
- Tree construction (check all nodes for each parent)

**Cached after first access**:
- Content hash: `node.cid`

## Edge Cases Handled

- **Missing xattr support**: Falls back to deterministic hash
- **Empty tag sets**: Auto-cleanup in `untag()`
- **Multiple roots**: `tree()` requires explicit root_path
- **Non-existent paths**: `find_by_path()` returns `None`
- **Duplicate tags/untags**: Idempotent operations
- **Directories**: `cid` returns `None` (no content)

## Persistence Strategy (Future)

**Current state**: All data in-memory only. Tags/relations lost on session end.

**Planned approach**:
- xattr for eid only (bound to file)
- Separate index file for semantic layer (tags, relations, snapshots)
- Format: JSON initially (human-readable), SQLite later (performance)
- Index references eids, not paths (stable across moves)
- Enables tracking entities that don't currently exist on filesystem

**Rationale**: Tags are index metadata, not file metadata. Separation of concerns matches architecture.

## Dependencies

- **Python 3.12+**: For `match` statements, walrus operator
- **fastcore**: `AttrDict`, `L`, `@patch`
- **pathlib**: All path operations
- **hashlib.sha256**: Content hashing (stdlib)
- **uuid**: Entity identification (stdlib)
- **os.{get,set}xattr**: Extended attributes (stdlib, Unix only)
- **collections.defaultdict**: Auto-creating dicts (stdlib)

No external dependencies beyond fastcore.

---

*This document describes current implementation. Update as architecture evolves.*