# 🚀 filoma demo

Fast, multi-backend file analysis with a tiny API surface

In [3]:
import filoma

print(f"filoma version: {filoma.__version__}")

filoma version: 1.7.2


# 🔍📁 Directory Analysis

In [None]:
from filoma.directories import DirectoryProfiler, DirectoryProfilerConfig

# Create a profiler using the typed config dataclass
config = DirectoryProfilerConfig(use_rust=True)
dp1 = DirectoryProfiler(config)

analysis = dp1.probe("../")
dp1.print_summary(analysis)

[32m2025-09-09 08:30:25.950[0m | [34m[1mDEBUG   [0m | [36mfiloma.directories.directory_profiler[0m:[36m__init__[0m:[36m318[0m - [34m[1mInteractive environment detected, disabling progress bars to avoid conflicts[0m
[32m2025-09-09 08:30:25.950[0m | [1mINFO    [0m | [36mfiloma.directories.directory_profiler[0m:[36mprobe[0m:[36m433[0m - [1mStarting directory analysis of '../' using 🦀 Rust (Parallel) implementation[0m
[32m2025-09-09 08:30:26.496[0m | [32m[1mSUCCESS [0m | [36mfiloma.directories.directory_profiler[0m:[36mprobe[0m:[36m449[0m - [32m[1mDirectory analysis completed in 0.55s - Found 54,402 items (51,224 files, 3,178 folders) using 🦀 Rust (Parallel)[0m


In [5]:
dp1.print_report(analysis)

## 📁 Directory to DataFrame

In [6]:
from filoma import probe_to_df

df = probe_to_df("../", max_depth=2, enrich=True)
print(f"Found {len(df)} files")
df.head()

[32m2025-09-09 08:30:26.514[0m | [34m[1mDEBUG   [0m | [36mfiloma.directories.directory_profiler[0m:[36m__init__[0m:[36m318[0m - [34m[1mInteractive environment detected, disabling progress bars to avoid conflicts[0m
[32m2025-09-09 08:30:26.514[0m | [1mINFO    [0m | [36mfiloma.directories.directory_profiler[0m:[36mprobe[0m:[36m433[0m - [1mStarting directory analysis of '../' using 🦀 Rust (Parallel) implementation[0m
[32m2025-09-09 08:30:26.675[0m | [32m[1mSUCCESS [0m | [36mfiloma.directories.directory_profiler[0m:[36mprobe[0m:[36m449[0m - [32m[1mDirectory analysis completed in 0.16s - Found 320 items (274 files, 46 folders) using 🦀 Rust (Parallel)[0m


Found 153 files


path,depth,parent,name,stem,suffix,size_bytes,modified_time,created_time,is_file,is_dir,owner,group,mode_str,inode,nlink,sha256,xattrs
str,i64,str,str,str,str,i64,str,str,bool,bool,str,str,str,i64,i64,str,str
"""../pyproject.toml""",1,"""..""","""pyproject.toml""","""pyproject""",""".toml""",1791,"""2025-09-07 22:46:06""","""2025-09-07 22:46:06""",True,False,"""kalfasy""","""kalfasy""","""-rw-rw-r--""",7579961,1,,"""{}"""
"""../scripts""",1,"""..""","""scripts""","""scripts""","""""",4096,"""2025-09-05 20:26:25""","""2025-09-05 20:26:25""",False,True,"""kalfasy""","""kalfasy""","""drwxrwxr-x""",7603122,2,,"""{}"""
"""../.pytest_cache""",1,"""..""",""".pytest_cache""",""".pytest_cache""","""""",4096,"""2025-07-05 22:28:03""","""2025-07-05 22:28:03""",False,True,"""kalfasy""","""kalfasy""","""drwxrwxr-x""",7604845,3,,"""{}"""
"""../.vscode""",1,"""..""",""".vscode""",""".vscode""","""""",4096,"""2025-07-06 11:11:18""","""2025-07-06 11:11:18""",False,True,"""kalfasy""","""kalfasy""","""drwxrwxr-x""",7591635,2,,"""{}"""
"""../Makefile""",1,"""..""","""Makefile""","""Makefile""","""""",2827,"""2025-09-07 22:29:37""","""2025-09-07 22:29:37""",True,False,"""kalfasy""","""kalfasy""","""-rw-rw-r--""",7603119,1,,"""{}"""


## ⚡ DataFrame enrichment

In [None]:
from filoma.directories import DirectoryProfiler, DirectoryProfilerConfig

cfg = DirectoryProfilerConfig(build_dataframe=True, use_fd=False, return_absolute_paths=True, threads=8)
dprof = DirectoryProfiler(cfg)
res = dprof.probe("../")
df = res.dataframe.enrich()

[32m2025-09-09 08:34:29.771[0m | [34m[1mDEBUG   [0m | [36mfiloma.directories.directory_profiler[0m:[36m__init__[0m:[36m318[0m - [34m[1mInteractive environment detected, disabling progress bars to avoid conflicts[0m
[32m2025-09-09 08:34:29.771[0m | [1mINFO    [0m | [36mfiloma.directories.directory_profiler[0m:[36mprobe[0m:[36m433[0m - [1mStarting directory analysis of '../' using 🦀 Rust (Parallel) implementation[0m
[32m2025-09-09 08:34:30.471[0m | [32m[1mSUCCESS [0m | [36mfiloma.directories.directory_profiler[0m:[36mprobe[0m:[36m449[0m - [32m[1mDirectory analysis completed in 0.70s - Found 54,402 items (51,224 files, 3,178 folders) using 🦀 Rust (Parallel)[0m


In [11]:
df

path,parent,name,stem,suffix,size_bytes,modified_time,created_time,is_file,is_dir,owner,group,mode_str,inode,nlink,sha256,xattrs,depth
str,str,str,str,str,i64,str,str,bool,bool,str,str,str,i64,i64,str,str,i64
"""../pyproject.toml""","""..""","""pyproject.toml""","""pyproject""",""".toml""",1791,"""2025-09-07 22:46:06""","""2025-09-07 22:46:06""",true,false,"""kalfasy""","""kalfasy""","""-rw-rw-r--""",7579961,1,,"""{}""",1
"""../scripts""","""..""","""scripts""","""scripts""","""""",4096,"""2025-09-05 20:26:25""","""2025-09-05 20:26:25""",false,true,"""kalfasy""","""kalfasy""","""drwxrwxr-x""",7603122,2,,"""{}""",1
"""../.pytest_cache""","""..""",""".pytest_cache""",""".pytest_cache""","""""",4096,"""2025-07-05 22:28:03""","""2025-07-05 22:28:03""",false,true,"""kalfasy""","""kalfasy""","""drwxrwxr-x""",7604845,3,,"""{}""",1
"""../.vscode""","""..""",""".vscode""",""".vscode""","""""",4096,"""2025-07-06 11:11:18""","""2025-07-06 11:11:18""",false,true,"""kalfasy""","""kalfasy""","""drwxrwxr-x""",7591635,2,,"""{}""",1
"""../Makefile""","""..""","""Makefile""","""Makefile""","""""",2827,"""2025-09-07 22:29:37""","""2025-09-07 22:29:37""",true,false,"""kalfasy""","""kalfasy""","""-rw-rw-r--""",7603119,1,,"""{}""",1
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""../.ruff_cache/0.11.2/73586650…","""../.ruff_cache/0.11.2""","""7358665038973574044""","""7358665038973574044""","""""",163,"""2025-09-07 22:09:03""","""2025-09-07 22:09:03""",true,false,"""kalfasy""","""kalfasy""","""-rw-------""",7600957,1,,"""{}""",3
"""../.ruff_cache/0.11.2/17256147…","""../.ruff_cache/0.11.2""","""17256147029910247496""","""17256147029910247496""","""""",183,"""2025-09-07 22:09:03""","""2025-09-07 22:09:03""",true,false,"""kalfasy""","""kalfasy""","""-rw-------""",7600948,1,,"""{}""",3
"""../.ruff_cache/0.11.2/18404585…","""../.ruff_cache/0.11.2""","""18404585368784669101""","""18404585368784669101""","""""",1233,"""2025-09-07 22:13:49""","""2025-09-07 22:13:49""",true,false,"""kalfasy""","""kalfasy""","""-rw-------""",7601093,1,,"""{}""",3
"""../__pycache__/debug_test.cpyt…","""../__pycache__""","""debug_test.cpython-311-pytest-…","""debug_test.cpython-311-pytest-…",""".pyc""",3689,"""2025-07-05 23:01:37""","""2025-07-05 23:01:37""",true,false,"""kalfasy""","""kalfasy""","""-rw-rw-r--""",7602074,1,,"""{}""",2


In [None]:
df.to_polars()["parent"][0]

'/home/kalfasy/repos/filoma/docs'

In [10]:
from filoma import DataFrame

wrapper = DataFrame(df)
wrapper.df.head()

ValueError: data must be a Polars DataFrame, list of paths, or None

## 🤖 ML-ready splits

In [None]:
from filoma import ml

# Split into train/val/test sets with 70% train, 15% val, 15% test
train, val, test = ml.auto_split(df, train_val_test=(70, 15, 15), seed=42, include_all_parts=True)
print(f"Split sizes: {len(train)}, {len(val)}, {len(test)}")
train.head(3)

## 📄 Single file probe

In [None]:
from filoma import probe_file

file_info = probe_file("../README.md")
print(f"Path: {file_info.path}")
print(f"Size: {file_info.size}")
print(f"Modified: {file_info.modified}")

## 🖼️ Image analysis

In [None]:
from filoma import probe_image

img = probe_image("../images/logo.png")
print(f"Type: {img.file_type}")
print(f"Shape: {img.shape}")
print(f"Data range: {img.min} - {img.max}")