High-performance file metadata scanner for Windows and Linux.
Supports scanning of local drives:
- Windows: C:, D:, E:, and other drive letters (C: is excluded by default)
- Linux/WSL2: /mnt and /media mount points (/mnt/c is excluded by default)
Project features:
- Python 3.12.3 (CVE-2024-12718-safe)
- Cross-platform support (Windows and Linux)
.venvvirtual environmentfileScannerpackage with CLI- Comprehensive test suite with pytest
- Docker support
git clone <your-repo-url>
cd FileScanner
python -m venv .venv
.venv\Scripts\activate
pip install --upgrade pip
pip install -r requirements.txt
python -m fileScanner scan --helpgit clone <your-repo-url>
cd FileScanner
python3.12 -m venv .venv
source .venv/bin/activate
pip install --upgrade pip
pip install -r requirements.txt
python -m fileScanner scan --helpScan specific drives:
# Windows: Scan D: and E: drives
python -m fileScanner scan --drives d e
# Linux/WSL2: Scan /mnt/d and /mnt/e
python -m fileScanner scan --drives d e--drives d e f: Specify drives to scan (default: all except C:/mnt/c)--output-dir ./custom_results/: Custom output directory (default: ./Results_)--processes 8: Number of worker processes (default: CPU core count)--benchmark: Enable performance metrics--skip-errors: Continue on file access errors--dry-run: Preview without writing output-v, -vv, -vvv: Increase logging verbosity
Results are written in three formats:
- CSV: Detailed file listing
- XLSX: Excel spreadsheet with formatting
- HTML: Interactive HTML report
Output is organized in timestamped directories:
Results_/
├── scan_20251217_143022/
│ ├── files.csv
│ ├── report.html
│ ├── report.xlsx
│ └── scan_log.txt
# Build
docker build --pull --rm -f 'Dockerfile' -t 'filescanner:latest' '.'
# Run
docker run --rm -it filescanner:latest python -m fileScanner scan --help- Uses
python-magic-binfor MIME type detection - Permissions represented as "readable" or "read-only" (ACL-based)
- No uid/gid attributes
- Uses
python-magicfor MIME type detection - Permissions shown as octal format (e.g., "644")
- Includes uid/gid attributes
Windows:
# Activate virtual environment
.venv\Scripts\activate
# Run the scanner
python -m fileScanner scan --help
python -m fileScanner scan --drives d e --benchmarkLinux/WSL2:
# Activate virtual environment
source .venv/bin/activate
# Run the scanner
python -m fileScanner scan --help
python -m fileScanner scan --drives d e --benchmarkWindows:
# Using full path to Python executable
D:\WSL-Docker\FileScanner\.venv\Scripts\python.exe -m fileScanner scan --helpLinux/WSL2:
# Using full path to Python executable
/path/to/FileScanner/.venv/bin/python -m fileScanner scan --helpdocker run --rm filescannerapp:latest python -m fileScanner scan --help# Activate virtual environment first
# Windows: .venv\Scripts\activate
# Linux: source .venv/bin/activate
# Run all tests with minimal output
pytest
# Run all tests with verbose output showing each test
pytest -v
# Run all tests with detailed output and stop on first failure
pytest -vv -x# Run only scanner tests
pytest tests/test_scanner.py -v
# Run only metadata tests
pytest tests/test_metadata.py -v
# Run only output tests
pytest tests/test_output.py -v
# Run only parallel processing tests
pytest tests/test_parallel.py -v
# Run only CLI tests
pytest tests/test_cli.py -v# Run TestDiscoverDrives class
pytest tests/test_scanner.py::TestDiscoverDrives -v
# Run TestExtractMetadata class
pytest tests/test_metadata.py::TestExtractMetadata -v
# Run TestWriteResults class
pytest tests/test_output.py::TestWriteResults -v# Run single test
pytest tests/test_metadata.py::TestExtractMetadata::test_extract_metadata_regular_file -v
# Run multiple specific tests
pytest tests/test_scanner.py::TestDiscoverDrives::test_discover_drives_cross_platform tests/test_metadata.py::TestExtractMetadata::test_extract_metadata_regular_file -v# Run tests with output capture disabled (see print statements)
pytest -s -v
# Run tests and stop after first failure
pytest -x
# Run tests and stop after N failures
pytest --maxfail=3
# Run tests with specific marker
pytest -m "not slow" -v
# Run tests with coverage report
pytest --cov=fileScanner tests/
# Run tests and generate HTML report
pytest --html=report.html# Using Python module execution (without pytest installed in PATH)
python -m pytest tests/ -v
# Using full virtual environment path
.venv\Scripts\python.exe -m pytest tests/ -v # Windows
/path/to/.venv/bin/python -m pytest tests/ -v # Linux# Build and run tests in container
docker build -t filescannerapp:test .
docker run --rm filescannerapp:test python -m pytest tests/ -v# Show only summary (passed/failed/skipped count)
pytest --tb=no -q
# Show summary with percentage
pytest -q --tb=line
# Generate JSON report
pytest --json-report --json-report-file=report.json# Scan all available drives (except C: on Windows or /mnt/c on Linux)
python -m fileScanner scan
# Results are saved in: Results_/scan_YYYYMMDD_HHMMSS/
# - files.csv (CSV format)
# - files.xlsx (Excel format)
# - report.html (HTML report)
# - scan_log.txt (Detailed log file)Windows:
# Scan D: and E: drives only
python -m fileScanner scan --drives d e
# Scan single drive
python -m fileScanner scan --drives dLinux/WSL2:
# Scan /mnt/d and /mnt/e
python -m fileScanner scan --drives d e
# Scan /media mount points
python -m fileScanner scan --drives usb backup# Save results to custom location
python -m fileScanner scan --output-dir ./my_results/
# Save to absolute path
python -m fileScanner scan --output-dir /home/user/scan_results/ # Linux
python -m fileScanner scan --output-dir D:\MyScans\ # Windows# Use specific number of worker processes
python -m fileScanner scan --processes 4
# Enable benchmark for performance metrics
python -m fileScanner scan --benchmark
# Combine options
python -m fileScanner scan --drives d --processes 8 --benchmark# Skip files with access errors and continue scanning
python -m fileScanner scan --skip-errors
# Combine with verbose logging to see which errors occurred
python -m fileScanner scan --skip-errors -vv# Preview what would be scanned without writing files
python -m fileScanner scan --dry-run
# Dry run with benchmark to see expected performance
python -m fileScanner scan --dry-run --benchmark# Single verbosity level (INFO)
python -m fileScanner scan -v
# Double verbosity (DEBUG)
python -m fileScanner scan -vv
# Triple verbosity (TRACE/most detailed)
python -m fileScanner scan -vvv
# Combine with output directory
python -m fileScanner scan --drives d e --output-dir ./results/ -vFull scan with all options:
python -m fileScanner scan \
--drives d e \
--output-dir ./comprehensive_scan/ \
--processes 8 \
--benchmark \
--skip-errors \
-vvQuick scan for testing:
python -m fileScanner scan --drives d --dry-run --benchmarkProduction scan with error handling:
python -m fileScanner scan \
--output-dir /secure/backups/file_inventory/ \
--skip-errors \
-vAfter scanning, results are in Results_/scan_YYYYMMDD_HHMMSS/:
# View CSV in terminal (first 10 lines)
head files.csv
# Open HTML report in browser
open report.html # macOS
xdg-open report.html # Linux
start report.html # Windows
# Read log for errors
tail scan_log.txt
# Build image
docker build -t filescannerapp:latest .
# Run with help
docker run --rm filescannerapp:latest python -m fileScanner scan --help
# Run basic scan
docker run --rm filescannerapp:latest python -m fileScanner scan# Build with specific version tag
docker build -t filescannerapp:v1.0 .
# Build with multiple tags
docker build -t filescannerapp:latest -t filescannerapp:stable .
# Build from specific Dockerfile
docker build -f Dockerfile -t filescannerapp:latest .Windows:
# Mount Windows drive to container
docker run --rm -v D:\data:/scan_data filescannerapp:latest \
python -m fileScanner scan --output-dir /scan_data/results/
# Mount multiple drives
docker run --rm \
-v D:\data:/data_d \
-v E:\backup:/data_e \
filescannerapp:latest \
python -m fileScanner scan --drives d eLinux/WSL2:
# Mount directory to container
docker run --rm -v /mnt/d:/scan_data filescannerapp:latest \
python -m fileScanner scan --output-dir /scan_data/results/# Run in interactive mode with shell
docker run --rm -it filescannerapp:latest /bin/bash
# Run with environment variables (if app supports them)
docker run --rm \
-e PYTHONUNBUFFERED=1 \
filescannerapp:latest \
python -m fileScanner scan# Run pytest in container
docker run --rm filescannerapp:latest python -m pytest tests/ -v
# Run specific test file
docker run --rm filescannerapp:latest python -m pytest tests/test_scanner.py -v
# Run with coverage
docker run --rm filescannerapp:latest python -m pytest tests/ --cov=fileScanner# Force rebuild without using cached layers
docker build --no-cache -t filescannerapp:latest .
# Useful when dependencies have been updated
docker build --pull --no-cache -t filescannerapp:latest .Create docker-compose.yml:
version: '3.8'
services:
scanner:
build: .
image: filescannerapp:latest
volumes:
- /mnt/d:/scan_drive
command: python -m fileScanner scan --drives d --output-dir /scan_drive/results/Run with compose:
docker-compose upRun with resource limits:
# Limit CPU and memory
docker run --rm \
--cpus="2" \
--memory="2g" \
filescannerapp:latest \
python -m fileScanner scanRun in background:
# Run detached
docker run -d \
--name scanner_job \
-v D:\data:/scan_data \
filescannerapp:latest \
python -m fileScanner scan --output-dir /scan_data/results/
# Check logs
docker logs scanner_job
# Stop container
docker stop scanner_jobInspect running container:
# List running containers
docker ps
# View logs
docker logs <container_id>
# Execute command in running container
docker exec <container_id> ls -la /scan_data/# Remove image
docker rmi filescannerapp:latest
# Remove all dangling images
docker image prune
# Remove stopped containers
docker container prune
# Full cleanup (images, containers, volumes)
docker system prune -a# View full image layers
docker history filescannerapp:latest
# Inspect image details
docker inspect filescannerapp:latest
# Build with verbose output
docker build --progress=plain -t filescannerapp:latest .
# Test if Python and packages are available
docker run --rm filescannerapp:latest python -c "import magic; print(magic.__version__)"# Run all tests
pytest
# Run with verbose output
pytest -v
# Run platform-specific tests only
pytest -m "not skipif"Cross-platform tests automatically adapt to the running OS.