In [1]:
import docx
import os
import json
import yaml
from jsonschema import validate, ValidationError
from styling_utils import _apply_properties, _clean_paragraph, _remove_empty_paragraph, _adjust_bullet_character
from style_mapping import FONT_MAPPING, PARAGRAPH_FORMAT_MAPPING

In [2]:
MAIN_DIR = "C:/Users/kaszt/OneDrive/Dokumenty/format_test/"
INPUT_DIR = os.path.join(MAIN_DIR, "data/input/")
OUTPUT_DIR = os.path.join(MAIN_DIR, "data/output/")

In [3]:
doc = docx.Document(os.path.join(INPUT_DIR, "test_yaml.docx"))
style_config_filename_json = "style.json"
style_schema_filename_json = "style_schema.json"
style_config_filename_yaml = "style_config.yaml"
style_schema_filename_yaml = "style_config_schema.yaml"

In [4]:
def load_json_file(input_dir, filename):
    path = os.path.join(input_dir, filename)
    if not os.path.exists(path):
        raise FileNotFoundError(f"File not found: {path}")
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)
    
def load_yaml_file(input_dir, filename):
    path = os.path.join(input_dir, filename)
    if not os.path.exists(path):
        raise FileNotFoundError(f"File not found: {path}")
    with open(path, "r", encoding="utf-8") as f:
        return yaml.safe_load(f)

In [5]:
def load_and_validate_styles_json(input_dir: str, style_filename: str, schema_filename: str) -> dict:
    style_config = load_json_file(input_dir, style_filename)
    style_schema = load_json_file(input_dir, schema_filename)
    try:
        validate(instance=style_config, schema=style_schema)
    except ValidationError as e:
        raise ValueError(f"❌ JSON validation error: {e.message}") from e

    return style_config

def load_and_validate_styles_yaml(input_dir: str, style_filename: str, schema_filename: str) -> dict:
    style_config = load_yaml_file(input_dir, style_filename)
    style_schema = load_yaml_file(input_dir, schema_filename)
    try:
        validate(instance=style_config, schema=style_schema)
    except ValidationError as e:
        raise ValueError(f"❌ YAML validation error: {e.message}") from e

    return style_config

In [6]:
def apply_style_properties(style_obj, style_definition):
    """
    Apply font and paragraph formatting from a single style definition
    to a docx style object.
    """
    _apply_properties(style_obj.font, style_definition.get("font", {}), FONT_MAPPING)
    _apply_properties(style_obj.paragraph_format, style_definition.get("paragraph_format", {}), PARAGRAPH_FORMAT_MAPPING)


def clean_paragraph(paragraph):
    """Perform cleanup on a single paragraph: trim spaces and remove if empty."""
    _clean_paragraph(paragraph, trim_spaces=True)
    _remove_empty_paragraph(paragraph)

In [7]:
def apply_style_group(doc, style_group: dict):
    """
    Apply a group of styles from a configuration dictionary to the docx Document.

    style_group: dict
        Keys are style names, values are style definitions (font + paragraph_format + based_on)
    """
    for style_name, style_def in style_group.items():
        style_obj = doc.styles[style_name]
        if not style_obj or not isinstance(style_def, dict):
            continue

        if style_def.get("based_on"):
            style_obj.base_style = doc.styles[style_def["based_on"]]

        apply_style_properties(style_obj, style_def)

def apply_all_document_styles(doc, styles_config):
    """
    Apply all styles from the loaded configuration to the docx Document,
    including paragraph styles, chapter/section styles, bullet adjustments,
    and paragraph cleanup.
    """

    paragraph_styles = styles_config["document_formatter_config"].get("paragraph_styles", {})
    apply_style_group(doc, paragraph_styles)

    chapter_section_rules = styles_config["document_formatter_config"].get("chapter_and_section_rules", {})
    apply_style_group(doc, chapter_section_rules)

    list_rules = styles_config["document_formatter_config"].get("list_rules", {})
    _adjust_bullet_character(doc, list_rules)

    for paragraph in doc.paragraphs[:]:
        clean_paragraph(paragraph)

In [8]:
styles_config = load_and_validate_styles_yaml(input_dir=INPUT_DIR, style_filename=style_config_filename_yaml, schema_filename=style_schema_filename_yaml)

apply_all_document_styles(doc, styles_config)

In [9]:
doc.save(os.path.join(OUTPUT_DIR, "test_modified.docx"))