In [None]:
import os
from pathlib import Path

# Root directory
root = Path("/app/data/dev/hyfs/test/fs")

# Create the root directory
root.mkdir(parents=True, exist_ok=True)

# Define the filesystem structure with lots of variety and edge cases
filesystem = {
    # Regular files at root
    "README.md": "# Test Project\n",
    "config.json": '{"version": "1.0"}\n',
    ".gitignore": "*.pyc\n__pycache__/\n",
    ".env": "SECRET_KEY=test123\n",
    "requirements.txt": "numpy==1.24.0\npandas>=2.0.0\n",
    
    # Source code directory
    "src/main.py": "def main():\n    pass\n",
    "src/utils.py": "# Utilities\n",
    "src/__init__.py": "",
    "src/models/user.py": "class User:\n    pass\n",
    "src/models/__init__.py": "",
    "src/models/product.py": "class Product:\n    pass\n",
    
    # Tests directory
    "tests/test_main.py": "def test_main():\n    assert True\n",
    "tests/__init__.py": "",
    "tests/fixtures/data.json": '{"test": "data"}\n',
    "tests/fixtures/sample.csv": "id,name,value\n1,test,100\n",
    
    # Data directory with various file types
    "data/raw/dataset_2023.csv": "col1,col2,col3\n1,2,3\n",
    "data/raw/dataset_2024.csv": "col1,col2,col3\n4,5,6\n",
    "data/processed/cleaned_data.parquet": b"fake parquet data",
    "data/processed/features.pkl": b"fake pickle data",
    "data/images/photo1.jpg": b"fake jpg data",
    "data/images/photo2.png": b"fake png data",
    "data/images/thumbnails/thumb1.jpg": b"fake thumbnail",
    
    # Documentation
    "docs/index.html": "<html><body>Docs</body></html>\n",
    "docs/api/endpoints.md": "# API Endpoints\n",
    "docs/api/authentication.md": "# Auth\n",
    "docs/guides/getting-started.pdf": b"fake pdf data",
    
    # Configuration files
    "config/development.yaml": "debug: true\n",
    "config/production.yaml": "debug: false\n",
    "config/database.ini": "[database]\nhost=localhost\n",
    
    # Build artifacts
    "build/output.js": "console.log('built');\n",
    "build/styles.css": "body { margin: 0; }\n",
    "dist/bundle.min.js": "!function(){console.log('minified')}();\n",
    
    # Edge cases
    "files with spaces/document 1.txt": "Content with spaces\n",
    "files with spaces/my file (copy).docx": b"fake docx",
    "special-chars/file@2024.txt": "File with @ symbol\n",
    "special-chars/data#1.csv": "test,data\n",
    "special-chars/report_v2.1.pdf": b"fake pdf",
    "multiple.dots.in.name.txt": "Multiple dots\n",
    "UPPERCASE.TXT": "UPPERCASE FILE\n",
    "MixedCase.TxT": "Mixed case extension\n",
    
    # Hidden files and directories
    ".hidden/secret.txt": "Hidden content\n",
    ".hidden/.config": "hidden config\n",
    ".cache/temp1.tmp": "cache data\n",
    
    # Empty directory (will create separately)
    "empty_dir/.keep": "",
    
    # Deep nesting
    "a/b/c/d/e/deep_file.txt": "Very nested\n",
    
    # Various extensions
    "scripts/deploy.sh": "#!/bin/bash\necho 'deploying'\n",
    "scripts/backup.bat": "@echo off\necho backing up\n",
    "notebooks/analysis.ipynb": '{"cells": []}\n',
    "media/video.mp4": b"fake video data",
    "media/audio.mp3": b"fake audio data",
    "archives/backup.zip": b"fake zip data",
    "archives/old_data.tar.gz": b"fake tar.gz data",
    
    # Files with no extension
    "LICENSE": "MIT License\n",
    "Makefile": "all:\n\techo 'building'\n",
    "Dockerfile": "FROM python:3.11\n",
    
    # Very long filename
    "long_filename_that_goes_on_and_on_and_on_to_test_length_limits.txt": "Long name\n",
    
    # Numeric filenames
    "logs/2024-01-01.log": "[INFO] Log entry\n",
    "logs/2024-01-02.log": "[ERROR] Error entry\n",
    "reports/001_report.txt": "Report 1\n",
    "reports/002_report.txt": "Report 2\n",
}

# Create all files and directories
for filepath, content in filesystem.items():
    full_path = root / filepath
    full_path.parent.mkdir(parents=True, exist_ok=True)
    
    if isinstance(content, bytes):
        full_path.write_bytes(content)
    else:
        full_path.write_text(content)

# Create a truly empty directory
(root / "truly_empty").mkdir(exist_ok=True)

# Create another empty nested directory
(root / "temp/cache/empty").mkdir(parents=True, exist_ok=True)

print(f"✓ Created dummy filesystem at {root}")
print(f"✓ Total files created: {len(filesystem)}")
print(f"✓ Includes edge cases: spaces, special chars, hidden files, deep nesting, various extensions")

✓ Created dummy filesystem at /app/data/dev/hyfs/test/fs
✓ Total files created: 60
✓ Includes edge cases: spaces, special chars, hidden files, deep nesting, various extensions


In [None]:
!find /app/data/dev/hyfs/test -print | sed -e "s;/app/data/dev/hyfs/test;;" -e "s;[^/]*/;|  ;g" -e "s;|  \([^|]\);├─ \1;"


├─ fs
|  ├─ config.json
|  ├─ .hidden
|  |  ├─ secret.txt
|  |  ├─ .config
|  ├─ src
|  |  ├─ models
|  |  |  ├─ product.py
|  |  |  ├─ __init__.py
|  |  |  ├─ user.py
|  |  ├─ utils.py
|  |  ├─ __init__.py
|  |  ├─ main.py
|  ├─ dist
|  |  ├─ bundle.min.js
|  ├─ tests
|  |  ├─ test_main.py
|  |  ├─ __init__.py
|  |  ├─ fixtures
|  |  |  ├─ data.json
|  |  |  ├─ sample.csv
|  ├─ build
|  |  ├─ styles.css
|  |  ├─ output.js
|  ├─ reports
|  |  ├─ 002_report.txt
|  |  ├─ 001_report.txt
|  ├─ config
|  |  ├─ development.yaml
|  |  ├─ production.yaml
|  |  ├─ database.ini
|  ├─ .gitignore
|  ├─ requirements.txt
|  ├─ media
|  |  ├─ video.mp4
|  |  ├─ audio.mp3
|  ├─ docs
|  |  ├─ api
|  |  |  ├─ endpoints.md
|  |  |  ├─ authentication.md
|  |  ├─ index.html
|  |  ├─ guides
|  |  |  ├─ getting-started.pdf
|  ├─ files with spaces
|  |  ├─ document 1.txt
|  |  ├─ my file (copy).docx
|  ├─ UPPERCASE.TXT
|  ├─ MixedCase.TxT
|  ├─ README.md
|  ├

# HyFS
> A Hyper FileSystem

Architecture phase: exploration (4)

In [None]:
%cat notes/design.md

# HyFS Design Document
> Hyper FileSystem - A filesystem abstraction with stable identity and semantic relationships

## Vision

HyFS provides stable entity identification and semantic organization for filesystems. Files and directories get persistent identities (eids) that survive renames and moves, enabling tagging, relationship tracking, and multiple views of the same underlying data.

Built for interactive exploration in SolveIT notebooks using fastcore principles.

## Core Philosophy

### Principle of Lean Information Form (LIF)

Information must be expressed in its meaningful form, preserving integrity without requiring decoders. Store semantic structure directly, decide display independently.

**LIF Lemma 1: Separation of Concerns**

Three orthogonal concepts, stored separately:
1. **Entity storage**: Flat dict `{eid -> node}` (canonical)
2. **Filesystem hierarchy**: Derived from `path` relationships (view)
3. **Semantic organization**: Tags and relations (me

In [None]:
%cat notes/arch.md

# HyFS Architecture Document
> Implementation details, data structures, and algorithms

## Data Structures

### FSNode: AttrDict with Properties

`FSNode` extends `AttrDict` to enable both dict-style (`node['path']`) and attribute-style (`node.path`) access, optimized for REPL exploration.

**Property override challenge**: AttrDict's `__getattr__` intercepts attribute access before class properties. Solution: override `__getattribute__` to check class properties first.

Properties enable lazy computation:
- `cid`: Computed on first access, cached in node dict
- Future: `size`, `mtime`, `permissions`

**Custom repr**: Shows file/dir icon (📄/📁), name, and truncated eid (8 chars) for clean REPL display.

### HyFS: Flat Storage Container

```python
self.nodes = {}              # eid -> FSNode (canonical storage)
self.path_index = {}         # path -> eid (O(1) lookups)
self.children_index = {}     # parent_eid -> {child_eids} (O(n) tree construction)
self.tags = defa

In [None]:
import uuid
import os
import errno
from hashlib import sha256
from pathlib import Path
from fastcore.basics import AttrDict, patch
from fastcore.foundation import L
from fnmatch import fnmatch
from collections import defaultdict
