In [2]:
import os
import sys
import time
import json
import argparse
from datetime import datetime, date
from zoneinfo import ZoneInfo
from urllib.parse import urlencode, urljoin
import urllib.request

In [3]:
BASE_URL = "https://search.dip.bundestag.de/api/v1/"
UA = "dip-digest-bot/1.1 (+https://example.local)"
SLEEP_SEC = 0.6  # вежливая пауза между страницами

def api_key() -> str:
    key = os.environ.get("DIP_API_KEY")
    if not key:
        print("Ошибка: установите переменную окружения DIP_API_KEY", file=sys.stderr)
        sys.exit(2)
    return key

def http_get(url: str, headers: dict) -> dict:
    req = urllib.request.Request(url, headers=headers, method="GET")
    with urllib.request.urlopen(req, timeout=60) as resp:
        if resp.status != 200:
            raise RuntimeError(f"HTTP {resp.status} for {url}")
        return json.loads(resp.read().decode("utf-8"))

def build_query(day: date, cursor: str | None = None) -> str:
    # отбираем только ответы правительства
    params = [
        ("f.zuordnung", "BT"),
        ("f.datum.start", day.strftime("%Y-%m-%d")),
        ("f.datum.end", day.strftime("%Y-%m-%d")),
        ("f.drucksachetyp", "Antwort der Bundesregierung"),
        ("format", "json"),
    ]
    if cursor:
        params.append(("cursor", cursor))
    return urlencode(params)

def fetch_drucksachen_answers(day: date, key: str) -> list[dict]:
    headers = {"Authorization": f"ApiKey {key}", "Accept": "application/json", "User-Agent": UA}
    items = []
    cursor = None
    while True:
        q = build_query(day, cursor)
        url = urljoin(BASE_URL, f"drucksache?{q}")
        data = http_get(url, headers)
        docs = data.get("documents", [])
        items.extend(docs)
        new_cursor = data.get("cursor")
        if not new_cursor or new_cursor == cursor:
            break
        cursor = new_cursor
        time.sleep(SLEEP_SEC)
    return items

def fetch_vorgang(vorgang_id: str, key: str) -> dict:
    headers = {"Authorization": f"ApiKey {key}", "Accept": "application/json", "User-Agent": UA}
    url = urljoin(BASE_URL, f"vorgang/{vorgang_id}?format=json")
    return http_get(url, headers)

def group_key(urheber: str | None) -> str:
    if not urheber:
        return "Unbekannt"
    return urheber.strip() or "Unbekannt"

def extract_core(doc: dict) -> dict:
    fundstelle = doc.get("fundstelle") or {}
    pdf = fundstelle.get("pdf_url")
    urheber = None
    # в разных записях авторы могут быть в разных полях
    if isinstance(doc.get("urheber"), list):
        urheber = ", ".join(map(str, doc.get("urheber")))
    elif isinstance(doc.get("urheber"), str):
        urheber = doc.get("urheber")
    elif isinstance(doc.get("autoren"), list):
        urheber = ", ".join(map(str, doc.get("autoren")))
    return {
        "id": doc.get("id"),
        "drucksachetyp": doc.get("drucksachetyp"),
        "dokumentnummer": doc.get("dokumentnummer"),
        "titel": doc.get("titel") or "Без названия",
        "datum": doc.get("datum"),
        "wahlperiode": doc.get("wahlperiode"),
        "urheber": urheber,
        "pdf_url": pdf,
        "vorgangsbezug": doc.get("vorgangsbezug") or [],
    }

def filter_only_ka_ga(items: list[dict], key: str) -> list[dict]:
    """Оставить ответы только на Kleine Anfrage или Große Anfrage.
    Проверяем vorgangsbezug[].vorgangstyp, при необходимости подтягиваем сам vorgang."""
    result = []
    for d in items:
        entry = extract_core(d)
        vb = entry["vorgangsbezug"]
        keep = False
        types_local = []
        for ref in vb:
            vt = ref.get("vorgangstyp")
            if vt:
                types_local.append(vt)
        if any(t in ("Kleine Anfrage", "Große Anfrage", "Grosse Anfrage") for t in types_local):
            keep = True
        elif vb:
            # запасной путь: запросить первый vorgang и проверить тип
            try:
                vg_id = str(vb[0].get("id"))
                vg = fetch_vorgang(vg_id, key)
                vt = vg.get("vorgangstyp")
                if vt in ("Kleine Anfrage", "Große Anfrage", "Grosse Anfrage"):
                    keep = True
            except Exception:
                keep = False
        if keep:
            result.append(entry)
    return result

def build_md(day: date, entries: list[dict]) -> str:
    head = f"# Antworten der Bundesregierung auf Kleine/Große Anfragen - {day.strftime('%Y-%m-%d')}\n\n"
    if not entries:
        return head + "_Ничего не найдено._\n"
    entries_sorted = sorted(entries, key=lambda e: (group_key(e["urheber"]), e.get("datum") or "", e.get("dokumentnummer") or ""))
    md = [head]
    current = None
    for e in entries_sorted:
        g = group_key(e["urheber"])
        if g != current:
            md.append(f"## {g}\n")
            current = g
        line = f"- **{e['titel']}**"
        if e.get("dokumentnummer"):
            line += f" · BT-Drucksache {e['dokumentnummer']}"
        if e.get("drucksachetyp"):
            line += f" · {e['drucksachetyp']}"
        if e.get("datum"):
            line += f" · {e['datum']}"
        if e.get("pdf_url"):
            line += f" · [PDF]({e['pdf_url']})"
        md.append(line)
    total = len(entries)
    md.append("")
    md.append(f"_Всего ответов: {total}._\n")
    return "\n".join(md)


In [None]:
ap = argparse.ArgumentParser(description="Дайджест только ответов федерального правительства на Kleine/Große Anfrage за день.")
ap.add_argument("--date", help="Дата YYYY-MM-DD, по умолчанию сегодня по Europe-Berlin.", default=None)
ap.add_argument("--print", action="store_true", help="Печатать дайджест в stdout.")
args = ap.parse_args()
tz = ZoneInfo("Europe/Berlin")
day = datetime.strptime(args.date, "%Y-%m-%d").date() if args.date else datetime.now(tz).date()
key = api_key()
raw = fetch_drucksachen_answers(day, key)
ka_ga = filter_only_ka_ga(raw, key)
md = build_md(day, ka_ga)
out_name = f"digest-answers-{day.strftime('%Y%m%d')}.md"
with open(out_name, "w", encoding="utf-8") as f:
    f.write(md)
if args.__dict__.get("print"):
    print(md)
print(f"Готово. Файл: {out_name}. Ответов: {len(ka_ga)}")