# 🚀 filoma demo

Fast, multi-backend file analysis with a tiny API surface

In [16]:
import filoma

print(f"filoma version: {filoma.__version__}")

filoma version: 1.7.3


# 🔍📁 Directory Analysis

In [17]:
from filoma.directories import DirectoryProfiler, DirectoryProfilerConfig

# Create a profiler using the typed config dataclass
config = DirectoryProfilerConfig(use_rust=True)
dp1 = DirectoryProfiler(config)

analysis = dp1.probe("../")
dp1.print_summary(analysis)

[32m2025-09-10 23:27:12.087[0m | [34m[1mDEBUG   [0m | [36mfiloma.directories.directory_profiler[0m:[36m__init__[0m:[36m343[0m - [34m[1mInteractive environment detected, disabling progress bars to avoid conflicts[0m
[32m2025-09-10 23:27:12.087[0m | [1mINFO    [0m | [36mfiloma.directories.directory_profiler[0m:[36mprobe[0m:[36m430[0m - [1mStarting directory analysis of '../' using 🦀 Rust (Parallel) implementation[0m
[32m2025-09-10 23:27:12.629[0m | [32m[1mSUCCESS [0m | [36mfiloma.directories.directory_profiler[0m:[36mprobe[0m:[36m446[0m - [32m[1mDirectory analysis completed in 0.54s - Found 54,573 items (51,369 files, 3,204 folders) using 🦀 Rust (Parallel)[0m


In [18]:
dp1.print_report(analysis)

## 📁 Directory to DataFrame

In [19]:
from filoma import probe_to_df

df = probe_to_df("../", max_depth=2, enrich=True)
print(f"Found {len(df)} files")
df.head()

[32m2025-09-10 23:27:12.645[0m | [34m[1mDEBUG   [0m | [36mfiloma.directories.directory_profiler[0m:[36m__init__[0m:[36m343[0m - [34m[1mInteractive environment detected, disabling progress bars to avoid conflicts[0m
[32m2025-09-10 23:27:12.646[0m | [1mINFO    [0m | [36mfiloma.directories.directory_profiler[0m:[36mprobe[0m:[36m430[0m - [1mStarting directory analysis of '../' using 🐍 Python implementation[0m
[32m2025-09-10 23:27:12.945[0m | [32m[1mSUCCESS [0m | [36mfiloma.directories.directory_profiler[0m:[36mprobe[0m:[36m446[0m - [32m[1mDirectory analysis completed in 0.30s - Found 360 items (299 files, 61 folders) using 🐍 Python[0m


Found 359 files


path,depth,parent,name,stem,suffix,size_bytes,modified_time,created_time,is_file,is_dir,owner,group,mode_str,inode,nlink,sha256,xattrs
str,i64,str,str,str,str,i64,str,str,bool,bool,str,str,str,i64,i64,str,str
"""../pyproject.toml""",1,"""..""","""pyproject.toml""","""pyproject""",""".toml""",1791,"""2025-09-10 23:22:43""","""2025-09-10 23:22:43""",True,False,"""kalfasy""","""kalfasy""","""-rw-rw-r--""",7579961,1,,"""{}"""
"""../scripts""",1,"""..""","""scripts""","""scripts""","""""",4096,"""2025-09-05 20:26:25""","""2025-09-05 20:26:25""",False,True,"""kalfasy""","""kalfasy""","""drwxrwxr-x""",7603122,2,,"""{}"""
"""../.pytest_cache""",1,"""..""",""".pytest_cache""",""".pytest_cache""","""""",4096,"""2025-07-05 22:28:03""","""2025-07-05 22:28:03""",False,True,"""kalfasy""","""kalfasy""","""drwxrwxr-x""",7604845,3,,"""{}"""
"""../.vscode""",1,"""..""",""".vscode""",""".vscode""","""""",4096,"""2025-07-06 11:11:18""","""2025-07-06 11:11:18""",False,True,"""kalfasy""","""kalfasy""","""drwxrwxr-x""",7591635,2,,"""{}"""
"""../Makefile""",1,"""..""","""Makefile""","""Makefile""","""""",2827,"""2025-09-07 22:29:37""","""2025-09-07 22:29:37""",True,False,"""kalfasy""","""kalfasy""","""-rw-rw-r--""",7603119,1,,"""{}"""


## ⚡ DataFrame enrichment

In [20]:
from filoma.directories import DirectoryProfiler, DirectoryProfilerConfig

cfg = DirectoryProfilerConfig(build_dataframe=True, use_fd=True, return_absolute_paths=True, threads=8)
dprof = DirectoryProfiler(cfg)
res = dprof.probe("../")
df = res.dataframe.enrich()

[32m2025-09-10 23:27:13.004[0m | [34m[1mDEBUG   [0m | [36mfiloma.directories.directory_profiler[0m:[36m__init__[0m:[36m343[0m - [34m[1mInteractive environment detected, disabling progress bars to avoid conflicts[0m
[32m2025-09-10 23:27:13.004[0m | [1mINFO    [0m | [36mfiloma.directories.directory_profiler[0m:[36mprobe[0m:[36m430[0m - [1mStarting directory analysis of '../' using 🔍 fd implementation[0m
[32m2025-09-10 23:27:13.032[0m | [34m[1mDEBUG   [0m | [36mfiloma.core.fd_integration[0m:[36mfind[0m:[36m171[0m - [34m[1mfd found 51369 results[0m
[32m2025-09-10 23:27:13.047[0m | [34m[1mDEBUG   [0m | [36mfiloma.core.fd_integration[0m:[36mfind[0m:[36m171[0m - [34m[1mfd found 3203 results[0m
[32m2025-09-10 23:27:13.927[0m | [32m[1mSUCCESS [0m | [36mfiloma.directories.directory_profiler[0m:[36mprobe[0m:[36m446[0m - [32m[1mDirectory analysis completed in 0.92s - Found 54,573 items (51,369 files, 3,204 folders) using 🔍 fd[0m


In [21]:
df

path,parent,name,stem,suffix,size_bytes,modified_time,created_time,is_file,is_dir,owner,group,mode_str,inode,nlink,sha256,xattrs,depth
str,str,str,str,str,i64,str,str,bool,bool,str,str,str,i64,i64,str,str,i64
"""/home/kalfasy/repos/filoma/__p…","""/home/kalfasy/repos/filoma/__p…","""test_analyzer.cpython-311-pyte…","""test_analyzer.cpython-311-pyte…",""".pyc""",4940,"""2025-07-06 10:56:45""","""2025-07-06 10:56:45""",true,false,"""kalfasy""","""kalfasy""","""-rw-rw-r--""",7608138,1,,"""{}""",2
"""/home/kalfasy/repos/filoma/__p…","""/home/kalfasy/repos/filoma/__p…","""debug_test.cpython-311-pytest-…","""debug_test.cpython-311-pytest-…",""".pyc""",3689,"""2025-07-05 23:01:37""","""2025-07-05 23:01:37""",true,false,"""kalfasy""","""kalfasy""","""-rw-rw-r--""",7602074,1,,"""{}""",2
"""/home/kalfasy/repos/filoma/.ru…","""/home/kalfasy/repos/filoma/.ru…","""CACHEDIR.TAG""","""CACHEDIR""",""".TAG""",43,"""2025-07-05 22:28:05""","""2025-07-05 22:28:05""",true,false,"""kalfasy""","""kalfasy""","""-rw-rw-r--""",7604854,1,,"""{}""",2
"""/home/kalfasy/repos/filoma/.ru…","""/home/kalfasy/repos/filoma/.ru…","""18404585368784669101""","""18404585368784669101""","""""",1233,"""2025-09-07 22:13:49""","""2025-09-07 22:13:49""",true,false,"""kalfasy""","""kalfasy""","""-rw-------""",7601093,1,,"""{}""",3
"""/home/kalfasy/repos/filoma/.ru…","""/home/kalfasy/repos/filoma/.ru…","""17256147029910247496""","""17256147029910247496""","""""",183,"""2025-09-07 22:09:03""","""2025-09-07 22:09:03""",true,false,"""kalfasy""","""kalfasy""","""-rw-------""",7600948,1,,"""{}""",3
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""/home/kalfasy/repos/filoma/.ve…","""/home/kalfasy/repos/filoma/.ve…","""common""","""common""","""""",4096,"""2025-09-10 23:22:14""","""2025-09-10 23:22:14""",false,true,"""kalfasy""","""kalfasy""","""drwxrwxr-x""",8096958,2,,"""{}""",9
"""/home/kalfasy/repos/filoma/.ve…","""/home/kalfasy/repos/filoma/.ve…","""return_logical""","""return_logical""","""""",4096,"""2025-09-10 23:22:14""","""2025-09-10 23:22:14""",false,true,"""kalfasy""","""kalfasy""","""drwxrwxr-x""",8096833,2,,"""{}""",9
"""/home/kalfasy/repos/filoma/.ve…","""/home/kalfasy/repos/filoma/.ve…","""return_character""","""return_character""","""""",4096,"""2025-09-10 23:22:14""","""2025-09-10 23:22:14""",false,true,"""kalfasy""","""kalfasy""","""drwxrwxr-x""",8096957,2,,"""{}""",9
"""/home/kalfasy/repos/filoma/.ve…","""/home/kalfasy/repos/filoma/.ve…","""size""","""size""","""""",4096,"""2025-09-10 23:22:14""","""2025-09-10 23:22:14""",false,true,"""kalfasy""","""kalfasy""","""drwxrwxr-x""",8096959,2,,"""{}""",9


In [22]:
df.to_polars()["parent"][0]

'/home/kalfasy/repos/filoma/__pycache__'

## 🤖 ML-ready splits

In [23]:
from filoma import ml

# Split into train/val/test sets with 70% train, 15% val, 15% test
train, val, test = ml.auto_split(df, train_val_test=(70, 15, 15), seed=42, include_all_parts=True)
print(f"Split sizes: {len(train)}, {len(val)}, {len(test)}")
train.head(3)

Split sizes: 38422, 7841, 8309


path,parent,name,stem,suffix,size_bytes,modified_time,created_time,is_file,is_dir,owner,group,mode_str,inode,nlink,sha256,xattrs,depth,_feat_parts
str,str,str,str,str,i64,str,str,bool,bool,str,str,str,i64,i64,str,str,i64,str
"""/home/kalfasy/repos/filoma/__p…","""/home/kalfasy/repos/filoma/__p…","""debug_test.cpython-311-pytest-…","""debug_test.cpython-311-pytest-…",""".pyc""",3689,"""2025-07-05 23:01:37""","""2025-07-05 23:01:37""",True,False,"""kalfasy""","""kalfasy""","""-rw-rw-r--""",7602074,1,,"""{}""",2,"""debug_test.cpython-311-pytest-…"
"""/home/kalfasy/repos/filoma/.ru…","""/home/kalfasy/repos/filoma/.ru…","""CACHEDIR.TAG""","""CACHEDIR""",""".TAG""",43,"""2025-07-05 22:28:05""","""2025-07-05 22:28:05""",True,False,"""kalfasy""","""kalfasy""","""-rw-rw-r--""",7604854,1,,"""{}""",2,"""CACHEDIR.TAG"""
"""/home/kalfasy/repos/filoma/.ru…","""/home/kalfasy/repos/filoma/.ru…","""7358665038973574044""","""7358665038973574044""","""""",163,"""2025-09-07 22:09:03""","""2025-09-07 22:09:03""",True,False,"""kalfasy""","""kalfasy""","""-rw-------""",7600957,1,,"""{}""",3,"""7358665038973574044"""


## 📄 Single file probe

In [24]:
from filoma import probe_file

file_info = probe_file("../README.md")
print(f"Path: {file_info.path}")
print(f"Size: {file_info.size}")
print(f"Modified: {file_info.modified}")

Path: /home/kalfasy/repos/filoma/README.md
Size: 10878
Modified: 2025-09-10 23:12:25


## 🖼️ Image analysis

In [25]:
from filoma import probe_image

img = probe_image("../images/logo.png")
print(f"Type: {img.file_type}")
print(f"Shape: {img.shape}")
print(f"Data range: {img.min} - {img.max}")

Type: png
Shape: (762, 628, 4)
Data range: 0.0 - 255.0
