In [None]:
import io
import os
import time
import random
import string
from pathlib import Path
from trie_memfs import TrieMemoryFileSystem
import fsspec
import pandas as pd

# --------------------------
# Configurable Parameters
# --------------------------
N_DIRS = 10          # top-level dirs
N_SUBDIRS = 2     # subdirs per dir
N_FILES = 20        # files per subdir
DEPTH = 2           # directory depth
FILE_SIZE = 1024    # bytes per file
SEED = 42           # reproducible random content

FILESYSTEMS: dict[str, fsspec.AbstractFileSystem] = {
    "trie_memory": TrieMemoryFileSystem(),
    "memory": fsspec.filesystem("memory"),
    "local": fsspec.filesystem("file"),
}

# --------------------------
# Helper functions
# --------------------------

def random_bytes(size):
    """Generate deterministic random bytes."""
    rnd = random.Random(SEED)
    return rnd.randbytes(size) if hasattr(rnd, "randbytes") else bytes(
        rnd.getrandbits(8) for _ in range(size)
    )

def make_tree(fs: fsspec.AbstractFileSystem, base_path: str, depth: int, n_dirs: int, n_subdirs: int, n_files: int, random_file: bytes):
    """Create a synthetic directory tree with files in given filesystem."""

    def _populate(path, level):
        if level >= depth:
            return
        for d in range(n_dirs if level == 0 else n_subdirs):
            dir_path = f"{path}/dir_{level}_{d}"
            fs.mkdirs(dir_path, exist_ok=True)
            for f in range(n_files):
                file_path = f"{dir_path}/file_{f}.bin"
                with fs.open(file_path, "wb") as fobj:
                    fobj.write(random_file)
            _populate(dir_path, level + 1)
    _populate(base_path, 0)

def time_call(func, *args, repeat=3, **kwargs):
    """Time a callable, returning average duration."""
    times = []
    for _ in range(repeat):
        start = time.perf_counter()
        func(*args, **kwargs)
        times.append(time.perf_counter() - start)
    return sum(times) / len(times)

# --------------------------
# Benchmark runner
# --------------------------

content = random_bytes(FILE_SIZE)

results = []
for fs_name, fs in FILESYSTEMS.items():
    base = f"/benchmark_{fs_name}"
    print(f"\nCreating tree in {fs_name}...")
   # make_tree(fs, base, DEPTH, N_DIRS, N_SUBDIRS, N_FILES, content)

    # Benchmark operations
    timings = {
        "make_tree": time_call(lambda: make_tree(fs, base, DEPTH, N_DIRS, N_SUBDIRS, N_FILES, content), repeat=1),
        "ls_root": time_call(fs.ls, base, detail=True),
        "walk": time_call(lambda: list(fs.walk(base))),
        "cat_random_file": None,
        "rm": time_call(lambda: fs.rm(base, recursive=True), repeat=1)
    }


    # Pick a random file
    all_files = [
        f"{base}/dir_0_0/file_0.bin",  # deterministic example
    ]
    if fs.exists(all_files[0]):
        timings["cat_random_file"] = time_call(fs.cat, all_files[0])

    results.append({
        "filesystem": fs_name,
        **timings
    })

# --------------------------
# Display results
# --------------------------
df = pd.DataFrame(results)
display(df)


Creating tree in trie_memory...

Creating tree in memory...

Creating tree in local...


Unnamed: 0,filesystem,make_tree,ls_root,walk,cat_random_file,rm
0,trie_memory,0.003613,1.1e-05,5.8e-05,,0.001664
1,memory,0.003674,0.00019,0.00169,,0.003775
2,local,0.177648,0.00013,0.004867,,0.06945
