# Code Aggregator - Documentation Export Tool

This notebook aggregates project files into separate text outputs for documentation and review.

**Outputs:**
1. `markdown_export.txt` - All markdown documentation
2. `code_web_export.txt` - Python and HTML files
3. `config_export.txt` - Configuration files (YAML, JSON, etc.)
4. `aggregation_summary.txt` - Summary of the aggregation

In [None]:
import os
from pathlib import Path
from datetime import datetime
from typing import List, Dict, Set

# Configuration - Customize these as needed
ROOT_PATH = '.'  # Current directory, change if needed

# File type categories
MARKDOWN_EXTENSIONS = {'.md', '.markdown'}
CODE_WEB_EXTENSIONS = {'.py', '.html', '.htm', '.js', '.css', '.ipynb', '.ico'}
CONFIG_EXTENSIONS = {'.yaml', '.yml', '.json', '.toml', '.ini', '.cfg', '.conf', '.txt', '.csv'}

# Directories to exclude
EXCLUDE_DIRS = {
    '.git', '__pycache__', 'node_modules', '.venv', 'venv',
    'env', '.pytest_cache', '.mypy_cache', 'dist', 'build',
    '.idea', '.vscode', 'uploads', 'output', '.ipynb_checkpoints'
}

# Files to exclude
EXCLUDE_FILES = {
    '.gitignore', '.DS_Store', 'requirements.txt',
    'markdown_export.txt', 'code_web_export.txt', 'config_export.txt', 'aggregation_summary.txt'
}

print("✓ Configuration loaded")

In [None]:
def should_exclude(path: Path) -> bool:
    """Check if a path should be excluded"""
    parts = path.parts
    for part in parts:
        if part in EXCLUDE_DIRS:
            return True
    if path.name in EXCLUDE_FILES:
        return True
    return False

def collect_files(root_path: Path) -> Dict[str, List[Path]]:
    """Collect all files categorized by type"""
    files = {
        'markdown': [],
        'code_web': [],
        'config': []
    }

    for file_path in root_path.rglob('*'):
        if not file_path.is_file():
            continue

        if should_exclude(file_path):
            continue

        suffix = file_path.suffix.lower()

        if suffix in MARKDOWN_EXTENSIONS:
            files['markdown'].append(file_path)
        elif suffix in CODE_WEB_EXTENSIONS:
            files['code_web'].append(file_path)
        elif suffix in CONFIG_EXTENSIONS:
            files['config'].append(file_path)

    # Sort files by path for consistent output
    for category in files:
        files[category].sort()

    return files

print("✓ Helper functions defined")

In [None]:
# Collect all files
root_path = Path(ROOT_PATH).resolve()
print(f"Scanning project: {root_path.name}")
print(f"Path: {root_path}\n")

files = collect_files(root_path)

print("Files found:")
print(f"  - Markdown files: {len(files['markdown'])}")
print(f"  - Code/Web files: {len(files['code_web'])}")
print(f"  - Config files: {len(files['config'])}")
print(f"  - Total: {sum(len(f) for f in files.values())}")

In [None]:
# Preview the files that will be exported
print("\n" + "="*80)
print("MARKDOWN FILES:")
print("="*80)
for f in files['markdown']:
    print(f"  - {f.relative_to(root_path)}")

print("\n" + "="*80)
print("CODE/WEB FILES:")
print("="*80)
for f in files['code_web']:
    print(f"  - {f.relative_to(root_path)}")

print("\n" + "="*80)
print("CONFIG FILES:")
print("="*80)
for f in files['config']:
    print(f"  - {f.relative_to(root_path)}")

In [None]:
def format_file_content(file_path: Path, root_path: Path) -> str:
    """Format a single file's content with headers"""
    relative_path = file_path.relative_to(root_path)
    separator = "=" * 80

    output = f"\n{separator}\n"
    output += f"FILE: {relative_path}\n"
    output += f"SIZE: {file_path.stat().st_size} bytes\n"
    output += f"{separator}\n\n"

    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
            output += content
            if not content.endswith('\n'):
                output += '\n'
    except Exception as e:
        output += f"[ERROR reading file: {e}]\n"

    output += f"\n{separator}\n"
    output += f"END OF FILE: {relative_path}\n"
    output += f"{separator}\n\n"

    return output

def create_export_file(category: str, file_list: List[Path], output_filename: str, root_path: Path):
    """Create an export file for a category"""
    if not file_list:
        print(f"  No files found for category: {category}")
        return

    output_path = root_path / output_filename
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    with open(output_path, 'w', encoding='utf-8') as out:
        # Write header
        header = f"""
{'#' * 80}
# {category.upper().replace('_', ' ')} FILES EXPORT
# Generated: {timestamp}
# Project: {root_path.name}
# Total files: {len(file_list)}
{'#' * 80}

TABLE OF CONTENTS:
"""
        out.write(header)

        # Write table of contents
        for i, file_path in enumerate(file_list, 1):
            relative_path = file_path.relative_to(root_path)
            out.write(f"{i:3d}. {relative_path}\n")

        out.write(f"\n{'#' * 80}\n")
        out.write("# FILE CONTENTS\n")
        out.write(f"{'#' * 80}\n\n")

        # Write each file's content
        for file_path in file_list:
            content = format_file_content(file_path, root_path)
            out.write(content)

    print(f"  ✓ Created: {output_filename} ({len(file_list)} files)")

print("✓ Export functions defined")

In [None]:
# Generate export files
print("\n" + "="*80)
print("GENERATING EXPORT FILES")
print("="*80 + "\n")

create_export_file('markdown', files['markdown'], 'markdown_export.txt', root_path)
create_export_file('code_web', files['code_web'], 'code_web_export.txt', root_path)
create_export_file('config', files['config'], 'config_export.txt', root_path)

print("\n✓ All export files generated!")

In [None]:
# Generate and save summary
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
summary = f"""
{'#' * 80}
# CODE AGGREGATION SUMMARY
# Generated: {timestamp}
# Project: {root_path.name}
{'#' * 80}

FILE COUNTS BY CATEGORY:
  - Markdown files: {len(files['markdown'])}
  - Code/Web files: {len(files['code_web'])}
  - Config files: {len(files['config'])}
  - Total files: {sum(len(f) for f in files.values())}

OUTPUT FILES CREATED:
  - markdown_export.txt - Documentation files
  - code_web_export.txt - Python and HTML files
  - config_export.txt - Configuration files
  - aggregation_summary.txt - This summary

{'#' * 80}
"""

# Save summary
summary_path = root_path / 'aggregation_summary.txt'
with open(summary_path, 'w', encoding='utf-8') as f:
    f.write(summary)

print(summary)
print(f"Summary saved to: {summary_path}")

## Done!

Your files have been exported to:
- **markdown_export.txt** - All markdown documentation
- **code_web_export.txt** - All Python and HTML code
- **config_export.txt** - All configuration files
- **aggregation_summary.txt** - Summary report

You can now use these files for documentation, review, or as input for other tools.