Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/generate-mdx-from-confluence.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ jobs:
# 플랫폼을 명시적으로 지정하여 아키텍처 불일치 오류 방지
DOCKER_DEFAULT_PLATFORM=linux/amd64 docker compose --progress=quiet pull confluence-mdx

- name: Docker image metadata
run: |
echo "=== Docker Image Metadata ==="
docker inspect querypie/confluence-mdx:latest --format 'Image Created: {{.Created}}'
docker inspect querypie/confluence-mdx:latest --format 'Image ID: {{.Id}}'
docker inspect querypie/confluence-mdx:latest --format 'Image Size: {{.Size}} bytes'

- name: Generate MDX files
working-directory: ./confluence-mdx
run: |
Expand Down
4 changes: 4 additions & 0 deletions confluence-mdx/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ COPY tests/ ./tests/
# var/ 최종 상태를 단일 레이어로 복사 (builder의 cache+fetch 결과)
COPY --from=builder /workdir/var/ ./var/

# Record build metadata
ARG BUILD_DATE
RUN echo "${BUILD_DATE:-$(date -u +%Y-%m-%dT%H:%M:%SZ)}" > /workdir/.build-date

# Create target/ directory
RUN mkdir -p target/ko target/en target/ja target/public

Expand Down
170 changes: 170 additions & 0 deletions confluence-mdx/bin/image_status.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
#!/usr/bin/env python3
"""
Docker image var/ data status report.

Scans var/ directory to produce a summary of page data freshness:
- Image build date
- fetch_state.yaml info
- Page count and version statistics
- Oldest pages (stale data candidates)

Usage:
bin/image_status.py [--var-dir VAR] [--top N]
"""

import argparse
import sys
from datetime import datetime, timezone
from pathlib import Path

import yaml


def read_build_date(workdir: Path) -> str:
"""Read image build date from .build-date file."""
build_date_file = workdir / ".build-date"
if build_date_file.exists():
return build_date_file.read_text().strip()
return "unknown"


def read_fetch_state(var_dir: Path) -> dict:
"""Find and read fetch_state.yaml."""
for state_file in var_dir.glob("*/fetch_state.yaml"):
with open(state_file) as f:
return yaml.safe_load(f) or {}
return {}


def scan_pages(var_dir: Path) -> list[dict]:
"""Scan all page.v2.yaml files and extract version metadata."""
pages = []
for page_dir in sorted(var_dir.iterdir()):
if not page_dir.is_dir() or not page_dir.name.isdigit():
continue
v2_file = page_dir / "page.v2.yaml"
if not v2_file.exists():
continue
try:
with open(v2_file) as f:
data = yaml.safe_load(f)
if not data:
continue
version_info = data.get("version", {})
pages.append({
"page_id": page_dir.name,
"title": data.get("title", "?"),
"version": version_info.get("number", "?"),
"created_at": version_info.get("createdAt", ""),
})
except Exception:
continue
return pages


def parse_iso(date_str: str) -> datetime | None:
"""Parse ISO 8601 date string."""
if not date_str:
return None
try:
return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
except ValueError:
return None


def format_report(workdir: Path, var_dir: Path, top_n: int) -> str:
"""Generate the status report."""
lines: list[str] = []

# Header
lines.append("# ── Image Status Report ──────────────────────")

# Build date
build_date = read_build_date(workdir)
lines.append(f" Build Date : {build_date}")

# Fetch state
state = read_fetch_state(var_dir)
if state:
lines.append(f" Last Modified : {state.get('last_modified_seen', '?')}")
lines.append(f" Last Recent Fetch: {state.get('last_recent_fetch', '?')}")
lines.append(f" Last Full Fetch : {state.get('last_full_fetch', '?')}")
lines.append(f" Pages Fetched : {state.get('pages_fetched', '?')}")
else:
lines.append(" Fetch State : not found")

# Scan pages
pages = scan_pages(var_dir)
lines.append(f" Pages in var/ : {len(pages)}")

if not pages:
lines.append("# ─────────────────────────────────────────────")
return "\n".join(lines)

# Parse dates and compute stats
dated_pages = []
for p in pages:
dt = parse_iso(p["created_at"])
if dt:
dated_pages.append((dt, p))

if dated_pages:
dated_pages.sort(key=lambda x: x[0])
oldest_dt = dated_pages[0][0]
newest_dt = dated_pages[-1][0]
lines.append(f" Oldest Version : {oldest_dt.strftime('%Y-%m-%d %H:%M')} UTC")
lines.append(f" Newest Version : {newest_dt.strftime('%Y-%m-%d %H:%M')} UTC")

# Age distribution
now = datetime.now(timezone.utc)
buckets = {"< 1 day": 0, "1-7 days": 0, "7-30 days": 0, "30-90 days": 0, "> 90 days": 0}
for dt, _ in dated_pages:
age_days = (now - dt).days
if age_days < 1:
buckets["< 1 day"] += 1
elif age_days < 7:
buckets["1-7 days"] += 1
elif age_days < 30:
buckets["7-30 days"] += 1
elif age_days < 90:
buckets["30-90 days"] += 1
else:
buckets["> 90 days"] += 1

lines.append("")
lines.append(" Age Distribution:")
for label, count in buckets.items():
if count > 0:
bar = "#" * min(count, 50)
lines.append(f" {label:>10s} : {count:3d} {bar}")

# Oldest pages (stale candidates)
lines.append("")
lines.append(f" Oldest {top_n} Pages (stale candidates):")
for dt, p in dated_pages[:top_n]:
age = (now - dt).days
lines.append(f" [{p['page_id']}] v{p['version']} ({age}d ago) {p['title']}")

lines.append("# ─────────────────────────────────────────────")
return "\n".join(lines)


def main():
parser = argparse.ArgumentParser(description="Docker image var/ data status report")
parser.add_argument("--workdir", default="/workdir", help="Working directory (default: /workdir)")
parser.add_argument("--var-dir", default=None, help="var/ directory path (default: <workdir>/var)")
parser.add_argument("--top", type=int, default=10, help="Number of oldest pages to show (default: 10)")
args = parser.parse_args()

workdir = Path(args.workdir)
var_dir = Path(args.var_dir) if args.var_dir else workdir / "var"

if not var_dir.is_dir():
print(f"ERROR: var/ directory not found: {var_dir}", file=sys.stderr)
sys.exit(1)

print(format_report(workdir, var_dir, args.top))


if __name__ == "__main__":
main()
38 changes: 37 additions & 1 deletion confluence-mdx/scripts/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,62 @@

set -o errexit -o nounset

# ── Diagnostic metadata ────────────────────────────
# Display image build date and fetch_state.yaml summary
print_image_info() {
echo "# ── Image Metadata ──────────────────────────"
if [[ -f /workdir/.build-date ]]; then
echo "# Build Date : $(cat /workdir/.build-date)"
else
echo "# Build Date : unknown"
fi

local state_file
state_file=$(find /workdir/var -name fetch_state.yaml -print -quit 2>/dev/null)
if [[ -n "$state_file" ]]; then
echo "# Fetch State: $state_file"
while IFS= read -r line; do
echo "# $line"
done < "$state_file"
else
echo "# Fetch State: not found"
fi

local page_count
page_count=$(find /workdir/var -mindepth 1 -maxdepth 1 -type d -name '[0-9]*' 2>/dev/null | wc -l | tr -d ' ')
echo "# Pages in var/: $page_count"
echo "# ─────────────────────────────────────────────"
}

# ── Commands ────────────────────────────────────────
case "${1:-help}" in
fetch_cli.py|convert_all.py|converter/cli.py)
print_image_info
command=$1
shift
echo "+ bin/$command $@"
exec bin/$command "$@"
;;
full) # Execute full workflow
print_image_info
shift
echo "# Starting full workflow..."
echo "+ bin/fetch_cli.py $@"
bin/fetch_cli.py "$@"
echo "+ bin/convert_all.py"
bin/convert_all.py
;;
status) # Show detailed var/ data status report
exec bin/image_status.py "${@:2}"
;;
bash|sh)
echo "+ $@"
exec "$@"
;;
help|--help|-h)
print_image_info
cat << EOF

Confluence-MDX Container

Usage:
Expand All @@ -33,6 +68,7 @@ Commands:
convert_all.py [args...] - Convert all pages to MDX
full [fetch args...] - Execute full workflow (fetch + convert)
converter/cli.py <in> <out> - Convert a single XHTML to MDX
status - Show var/ data freshness report
bash - Run interactive shell
help - Show this help message

Expand All @@ -41,6 +77,7 @@ Examples:
docker run docker.io/querypie/confluence-mdx:latest full --recent
docker run docker.io/querypie/confluence-mdx:latest convert_all.py
docker run docker.io/querypie/confluence-mdx:latest fetch_cli.py --attachments
docker run docker.io/querypie/confluence-mdx:latest status
docker run -v \$(pwd)/target:/workdir/target docker.io/querypie/confluence-mdx:latest full --local

Environment Variables:
Expand All @@ -53,4 +90,3 @@ EOF
exec "$@"
;;
esac