# Validating articles

Managing Imports

In [10]:
import json
import jsonschema
from pathlib import Path

Defining Constants

In [11]:
# Defining Colors
RED = "\033[1;31m"
YELLOW = "\033[0;33m"
NOCOLOR = "\033[0m"
GREEN = "\033[0;32m"
CYAN = "\033[1;36m"

ARTICLE_SCHEMA = Path("schema/article.json")
ALL_ARTICLES_DIR = Path("articles")


Code to validate articles

In [8]:
import jsonschema
from collections import defaultdict

custom_errors = {
    "date": "Date must be in MM‑DD‑YYYY format (e.g. 01‑31‑2025).",
}

error_log: dict[str, list[str]] = defaultdict(list)

research_validation_failed = False
for single_article_file in ALL_ARTICLES_DIR.rglob("*.json"):
    with open(single_article_file) as f:
        content = json.load(f)
    try:
        with open(ARTICLE_SCHEMA) as schema_f:
            schema_data = json.load(schema_f)
        jsonschema.validate(content, schema_data)
    except (json.JSONDecodeError) as e:
        research_validation_failed = True
        error_log[str(single_article_file)].append(f"Invalid JSON: {e}")
    except jsonschema.ValidationError as e:
        research_validation_failed = True
        field = list(e.path)[-1] if e.path else None

        msg = custom_errors.get(field, e.message)

        error_log[str(single_article_file)].append(f"{field!r}: {msg}")
if research_validation_failed:
    for fname, errs in error_log.items():
        for err in errs:
            print(f"Please recheck JSON file: {fname} — Error: {err}")
    print(f"{RED}VALIDATION FAILED")
else:
    print(f"{GREEN}VALIDATION SUCCESSFUL{NOCOLOR}")


[0;32mVALIDATION SUCCESSFUL[0m
