In [1]:
import docx
import os
import json
from xml.etree.ElementTree import Element
from jsonschema import validate, ValidationError
from styling_utils import _apply_properties, _clean_paragraph, _remove_empty_paragraph, _adjust_bullet_character
from style_mapping import FONT_MAPPING, PARAGRAPH_FORMAT_MAPPING

In [2]:
MAIN_DIR = "C:/Users/kaszt/OneDrive/Dokumenty/format_test/"
INPUT_DIR = os.path.join(MAIN_DIR, "data/input/")
OUTPUT_DIR = os.path.join(MAIN_DIR, "data/output/")

In [3]:
doc = docx.Document(os.path.join(INPUT_DIR, "test.docx"))
style_config_filename = "style.json"
style_schema_filename = "style_schema.json"

In [4]:
def load_json_file(input_dir, filename):
    path = os.path.join(input_dir, filename)
    if not os.path.exists(path):
        raise FileNotFoundError(f"File not found: {path}")
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

In [5]:
def load_and_validate_styles(input_dir: str, style_filename: str, schema_filename: str) -> dict:
    style_config = load_json_file(input_dir, style_filename)
    style_schema = load_json_file(input_dir, schema_filename)
    try:
        validate(instance=style_config, schema=style_schema)
    except ValidationError as e:
        raise ValueError(f"❌ JSON validation error: {e.message}") from e

    return style_config

In [6]:
def apply_style_from_json(style, style_json):
    """
    Apply font + paragraph_format settings from JSON to a style object.
    style_json must be a single style dictionary, not the whole file.
    """
    _apply_properties(style.font, style_json.get("font", {}), FONT_MAPPING)
    _apply_properties(style.paragraph_format, style_json.get("paragraph_format", {}), PARAGRAPH_FORMAT_MAPPING)

def apply_paragraph_cleanup(paragraph):
    """Apply cleanup to all paragraphs in the document."""
    _clean_paragraph(paragraph, trim_spaces=True)
    _remove_empty_paragraph(paragraph)

In [7]:
styles_config = load_and_validate_styles(INPUT_DIR, style_config_filename, style_schema_filename)

_adjust_bullet_character(doc, bullet_char="dash")

for style_json in styles_config["styles"]:
    style_name = style_json["name"]
    style = doc.styles[style_name]
    
    if "based_on" in style_json and style_json["based_on"]:
        style.base_style = doc.styles[style_json["based_on"]]

    apply_style_from_json(style, style_json)

for paragraph in doc.paragraphs[:]:
    apply_paragraph_cleanup(paragraph)

In [8]:
doc.save(os.path.join(OUTPUT_DIR, "test_modified.docx"))