## Calling

In [None]:
import os
import re
import json
import requests
from datetime import datetime, timedelta
from typing import List, Dict, Any, Optional, Tuple

# =========================
# CONFIG
# =========================
API_URL = "http://127.0.0.1:8000/currency/exchange-rates/batch"
BASE_DIR = "WebService/data"

# Accepts DD-MM-YYYY or YYYY-MM-DD for selecting folders (folders are YYYY-MM-DD)
START_DATE = "02-07-2025"
END_DATE   = "04-07-2025"

# Batch size per request (set to 10 by default; change as needed)
BATCH_SIZE = 10

REQUEST_TIMEOUT = 1000000  # seconds
STOP_ON_ERROR = False

# =========================
# HELPERS
# =========================

_DD_MM_YYYY_DASH = re.compile(r"^(\d{2})-(\d{2})-(\d{4})$")
_YYYY_MM_DD = re.compile(r"^(\d{4})-(\d{2})-(\d{2})$")

def _parse_date_any(s: str) -> datetime:
    s = s.strip()
    m = _DD_MM_YYYY_DASH.fullmatch(s)
    if m:
        dd, mm, yyyy = m.group(1), m.group(2), m.group(3)
        return datetime(int(yyyy), int(mm), int(dd))
    m = _YYYY_MM_DD.fullmatch(s)
    if m:
        yyyy, mm, dd = m.group(1), m.group(2), m.group(3)
        return datetime(int(yyyy), int(mm), int(dd))
    raise ValueError(f"Date must be DD-MM-YYYY or YYYY-MM-DD, got: {s!r}")

def _daterange_inclusive(start_dt: datetime, end_dt: datetime):
    cur = start_dt
    while cur <= end_dt:
        yield cur
        cur = cur + timedelta(days=1)

def _load_payload_for_day(day_dir: str) -> Optional[List[Any]]:
    """
    Reads the day's exchange_rates_payload.json and returns it AS-IS.
    No field normalization or mutation (dates or otherwise).
    """
    path = os.path.join(day_dir, "exchange_rates_payload.json")
    if not os.path.isfile(path):
        print(f"[skip] payload not found: {path}")
        return None
    try:
        with open(path, "r", encoding="utf-8") as f:
            data = json.load(f)
        if not isinstance(data, list):
            print(f"[warn] payload is not a list, skipping: {path}")
            return None
        return data  # EXACTLY what’s in the file
    except Exception as e:
        print(f"[warn] failed reading payload {path}: {e}")
        return None

def _post_payload(payload: List[Any]) -> Tuple[bool, Optional[Dict[str, Any]], Optional[str]]:
    try:
        r = requests.post(API_URL, json=payload, timeout=REQUEST_TIMEOUT)
        r.raise_for_status()
        try:
            return True, r.json(), None
        except Exception:
            return True, None, r.text
    except requests.RequestException as e:
        body = None
        if getattr(e, "response", None) is not None:
            try:
                body = e.response.text
            except Exception:
                body = None
        return False, None, f"{e}\n{('Response body: ' + body) if body else ''}"

def _chunked(lst: List[Any], size: int) -> List[List[Any]]:
    size = max(1, int(size))  # guard
    return [lst[i:i+size] for i in range(0, len(lst), size)]

def _dedupe_payload(payload: List[Any]) -> Tuple[List[Any], int]:
    """
    Remove duplicates while preserving order.
    Identity key (preferred): (ExchangeRateType, FromCurrency, ToCurrency, ValidFrom, Quotation)
    Fallback: canonical JSON of the whole item.
    Returns (deduped_list, removed_count).
    """
    seen = set()
    out: List[Any] = []
    removed = 0

    for item in payload:
        # Prefer logical key if all present
        key: Optional[Tuple[Any, ...]] = None
        if isinstance(item, dict):
            fields = (
                item.get("ExchangeRateType", None),
                item.get("FromCurrency", None),
                item.get("ToCurrency", None),
                item.get("ValidFrom", None),
                item.get("Quotation", None),
            )
            if all(v is not None for v in fields):
                key = ("LOGIC_KEY",) + fields

        if key is None:
            # Fallback to full-item canonical representation
            try:
                canonical = json.dumps(item, sort_keys=True, ensure_ascii=False)
            except Exception:
                canonical = repr(item)
            key = ("RAW_ITEM", canonical)

        if key in seen:
            removed += 1
            continue
        seen.add(key)
        out.append(item)

    return out, removed

# =========================
# MAIN
# =========================

def main():
    start_dt = _parse_date_any(START_DATE)
    end_dt   = _parse_date_any(END_DATE)
    if end_dt < start_dt:
        raise SystemExit(f"END_DATE {END_DATE} is before START_DATE {START_DATE}")

    bs = max(1, int(BATCH_SIZE))

    total_days = 0
    days_with_payload = 0
    posted_days = 0
    posted_batches = 0
    rows_sent = 0
    skipped_days = 0
    errors = 0
    total_dupes_removed = 0

    print(f"[range] {start_dt.date()} → {end_dt.date()} (inclusive)")
    print(f"[config] batch_size = {bs}\n")

    for d in _daterange_inclusive(start_dt, end_dt):
        total_days += 1
        day_name = d.strftime("%Y-%m-%d")
        day_dir = os.path.join(BASE_DIR, day_name)
        if not os.path.isdir(day_dir):
            print(f"[skip] day folder missing: {day_dir}")
            skipped_days += 1
            continue

        payload = _load_payload_for_day(day_dir)
        if not payload:
            print(f"[skip] no valid payload in: {day_dir}")
            skipped_days += 1
            continue

        days_with_payload += 1

        # De-duplicate per day
        deduped_payload, dupes_removed = _dedupe_payload(payload)
        total_dupes_removed += dupes_removed

        n_raw = len(payload)
        n = len(deduped_payload)
        if dupes_removed > 0:
            print(f"[day] {day_name}: {n_raw} rows → {n} after removing {dupes_removed} duplicate(s)")

        chunks = _chunked(deduped_payload, bs)
        total_chunks = len(chunks)
        print(f"[day] {day_name}: {n} rows → {total_chunks} batch(es) of up to {bs}")

        day_had_success = False

        for idx, batch in enumerate(chunks, start=1):
            print(f"[post] {day_name} | batch {idx}/{total_chunks}: {len(batch)} rows → {API_URL}")
            ok, resp_json, resp_text = _post_payload(batch)
            if ok:
                day_had_success = True
                posted_batches += 1
                rows_sent += len(batch)
                if resp_json is not None:
                    print(json.dumps(resp_json, indent=2))
                elif resp_text is not None:
                    print(resp_text)
                else:
                    print("[info] posted OK (no response body)")
            else:
                errors += 1
                print(f"[error] POST failed for {day_name} batch {idx}/{total_chunks}:\n{resp_text or '(no body)'}")
                if STOP_ON_ERROR:
                    print("\n[summary]")
                    print(f"  days in range       : {total_days}")
                    print(f"  days with payload   : {days_with_payload}")
                    print(f"  posted days         : {posted_days}")
                    print(f"  posted batches      : {posted_batches}")
                    print(f"  rows sent           : {rows_sent}")
                    print(f"  skipped days        : {skipped_days}")
                    print(f"  errors              : {errors}")
                    print(f"  duplicates removed  : {total_dupes_removed}")
                    return  # hard stop

        if day_had_success:
            posted_days += 1

    print("\n[summary]")
    print(f"  days in range       : {total_days}")
    print(f"  days with payload   : {days_with_payload}")
    print(f"  posted days         : {posted_days}")
    print(f"  posted batches      : {posted_batches}")
    print(f"  rows sent           : {rows_sent}")
    print(f"  skipped days        : {skipped_days}")
    print(f"  errors              : {errors}")
    print(f"  duplicates removed  : {total_dupes_removed}")

if __name__ == "__main__":
    main()


[range] 2025-07-02 → 2025-07-04 (inclusive)
[config] batch_size = 10

[day] 2025-07-02: 784 rows → 79 batch(es) of up to 10
[post] 2025-07-02 | batch 1/79: 10 rows → http://127.0.0.1:8000/currency/exchange-rates/batch
{
  "ok": true,
  "workers": 2,
  "total": 10,
  "created": 10,
  "failed": 0,
  "skipped": 0,
  "results": [
    {
      "index": 1,
      "payload": {
        "ExchangeRateType": "M",
        "FromCurrency": "AED",
        "ToCurrency": "AUD",
        "ValidFrom": "02.07.2025",
        "Quotation": "Direct",
        "ExchangeRate": "0.41400"
      },
      "status": "created",
      "dialog_open": false,
      "dialog_text": "",
      "footer_clicks": 0,
      "intermediate_toasts": [],
      "messages": [],
      "popover_text": "",
      "worker": 1,
      "round": 1
    },
    {
      "index": 2,
      "payload": {
        "ExchangeRateType": "M",
        "FromCurrency": "AED",
        "ToCurrency": "BHD",
        "ValidFrom": "02.07.2025",
        "Quotation": "Dire

KeyboardInterrupt: 

In [None]:
import os
import re
import json
import time
import requests
from datetime import datetime, timedelta
from typing import List, Dict, Any, Optional, Tuple

# =========================
# CONFIG (non-streaming only)
# =========================
API_BASE = "http://127.0.0.1:8000"
API_URL_BATCH = f"{API_BASE}/currency/exchange-rates/batch"

BASE_DIR = "WebService/data"

# Accepts DD-MM-YYYY or YYYY-MM-DD for selecting folders (folders are YYYY-MM-DD)
START_DATE = "04-07-2025"
END_DATE   = "15-07-2025"

# Batch size per request (set to 20 by default; change as needed)
BATCH_SIZE = 50

# HTTP timeouts / retries
REQUEST_TIMEOUT = 30000  # seconds per HTTP request
RETRY_MAX = 4
RETRY_BACKOFF_BASE = 2  # seconds (exponential)

# Behavior on failures
STOP_ON_ERROR = False  # if True, stop immediately when a batch fails

# Input hygiene
DROP_SAME_CURRENCY = True  # drop items where FromCurrency == ToCurrency

# Output
WRITE_DAY_SUMMARY = True  # write per-day JSON summary under each day folder

# =========================
# HELPERS
# =========================

_DD_MM_YYYY_DASH = re.compile(r"^(\d{2})-(\d{2})-(\d{4})$")
_YYYY_MM_DD = re.compile(r"^(\d{4})-(\d{2})-(\d{2})$")

def _parse_date_any(s: str) -> datetime:
    s = (s or "").strip()
    m = _DD_MM_YYYY_DASH.fullmatch(s)
    if m:
        dd, mm, yyyy = m.group(1), m.group(2), m.group(3)
        return datetime(int(yyyy), int(mm), int(dd))
    m = _YYYY_MM_DD.fullmatch(s)
    if m:
        yyyy, mm, dd = m.group(1), m.group(2), m.group(3)
        return datetime(int(yyyy), int(mm), int(dd))
    raise ValueError(f"Date must be DD-MM-YYYY or YYYY-MM-DD, got: {s!r}")

def _daterange_inclusive(start_dt: datetime, end_dt: datetime):
    cur = start_dt
    while cur <= end_dt:
        yield cur
        cur = cur + timedelta(days=1)

def _load_payload_for_day(day_dir: str) -> Optional[List[Any]]:
    """Read exchange_rates_payload.json and return AS-IS (no normalization)."""
    path = os.path.join(day_dir, "exchange_rates_payload.json")
    if not os.path.isfile(path):
        print(f"[skip] payload not found: {path}")
        return None
    try:
        with open(path, "r", encoding="utf-8") as f:
            data = json.load(f)
        if not isinstance(data, list):
            print(f"[warn] payload is not a list, skipping: {path}")
            return None
        return data
    except Exception as e:
        print(f"[warn] failed reading payload {path}: {e}")
        return None

def _chunked(lst: List[Any], size: int) -> List[List[Any]]:
    size = max(1, int(size))
    return [lst[i:i+size] for i in range(0, len(lst), size)]

def _dedupe_payload(payload: List[Any]) -> Tuple[List[Any], int]:
    """Deduplicate by logical key; fallback to full JSON. Preserve order."""
    seen = set()
    out: List[Any] = []
    removed = 0
    for item in payload:
        key: Optional[Tuple[Any, ...]] = None
        if isinstance(item, dict):
            fields = (
                item.get("ExchangeRateType"),
                item.get("FromCurrency"),
                item.get("ToCurrency"),
                item.get("ValidFrom"),
                item.get("Quotation"),
            )
            if all(v is not None for v in fields):
                key = ("LOGIC_KEY",) + fields
        if key is None:
            try:
                canonical = json.dumps(item, sort_keys=True, ensure_ascii=False)
            except Exception:
                canonical = repr(item)
            key = ("RAW_ITEM", canonical)
        if key in seen:
            removed += 1
            continue
        seen.add(key)
        out.append(item)
    return out, removed

def _filter_same_currency(payload: List[Any]) -> Tuple[List[Any], int]:
    """Drop rows where FromCurrency == ToCurrency (exact string compare)."""
    out: List[Any] = []
    dropped = 0
    for it in payload:
        if isinstance(it, dict):
            f = it.get("FromCurrency")
            t = it.get("ToCurrency")
            if f is not None and t is not None and str(f) == str(t):
                dropped += 1
                continue
        out.append(it)
    return out, dropped

def _post_with_retries(url: str, *, json_body: Any, timeout: Optional[int]) -> requests.Response:
    """POST with exponential backoff on 429/5xx/connect/read issues."""
    last_exc = None
    session = requests.Session()
    headers = {"Content-Type": "application/json"}
    for attempt in range(1, RETRY_MAX + 1):
        try:
            r = session.post(url, json=json_body, timeout=timeout, headers=headers)
            if r.status_code in (429, 502, 503, 504):
                raise requests.RequestException(f"HTTP {r.status_code}: {r.text[:200]}")
            r.raise_for_status()
            return r
        except (requests.ConnectTimeout, requests.ReadTimeout, requests.ConnectionError, requests.RequestException) as e:
            last_exc = e
            if attempt >= RETRY_MAX:
                break
            sleep_s = max(1, int(RETRY_BACKOFF_BASE) ** (attempt - 1))
            print(f"[retry] attempt {attempt}/{RETRY_MAX} failed: {e}. Backing off {sleep_s}s")
            time.sleep(sleep_s)
    if isinstance(last_exc, Exception):
        raise last_exc
    raise RuntimeError("Unknown POST failure")

def _post_payload_batch(payload: List[Any]) -> Tuple[bool, Optional[Dict[str, Any]], Optional[str]]:
    """Always post to the non-streaming /batch endpoint."""
    try:
        r = _post_with_retries(API_URL_BATCH, json_body=payload, timeout=REQUEST_TIMEOUT)
        try:
            return True, r.json(), None
        except Exception:
            return True, None, r.text
    except Exception as e:
        body = None
        if isinstance(e, requests.RequestException) and getattr(e, "response", None) is not None:
            try:
                body = e.response.text
            except Exception:
                body = None
        return False, None, f"{e}\n{('Response body: ' + body) if body else ''}"

def _write_day_summary(day_dir: str, *, total_rows: int, rows_after_filters: int,
                       dupes_removed: int, same_currency_dropped: int,
                       posted_batches: int, rows_sent: int, errors: int) -> None:
    if not WRITE_DAY_SUMMARY:
        return
    try:
        out = {
            "total_rows_in_file": total_rows,
            "rows_after_filters": rows_after_filters,
            "duplicates_removed": dupes_removed,
            "same_currency_dropped": same_currency_dropped,
            "posted_batches": posted_batches,
            "rows_sent": rows_sent,
            "errors": errors,
            "endpoint": "batch",
            "batch_size": BATCH_SIZE,
            "ts": datetime.now().isoformat(),
        }
        p = os.path.join(day_dir, "post_summary.json")
        with open(p, "w", encoding="utf-8") as f:
            json.dump(out, f, indent=2, ensure_ascii=False)
    except Exception as e:
        print(f"[warn] write day summary failed in {day_dir}: {e}")

# =========================
# MAIN (strict day->batches sequence)
# =========================

def main():
    start_dt = _parse_date_any(START_DATE)
    end_dt   = _parse_date_any(END_DATE)
    if end_dt < start_dt:
        raise SystemExit(f"END_DATE {END_DATE} is before START_DATE {START_DATE}")

    bs = max(1, int(BATCH_SIZE))

    total_days = 0
    days_with_payload = 0
    posted_days = 0
    posted_batches = 0
    rows_sent = 0
    skipped_days = 0
    errors = 0
    total_dupes_removed = 0
    total_same_currency_dropped = 0

    print(f"[range] {start_dt.date()} → {end_dt.date()} (inclusive)")
    print(f"[config] batch_size = {bs}, endpoint = /batch")
    print("[mode] STRICT ORDER: per-day, then per-day batches (no interleaving)\n")

    # STRICT: iterate days in order
    for d in _daterange_inclusive(start_dt, end_dt):
        total_days += 1
        day_name = d.strftime("%Y-%m-%d")
        day_dir = os.path.join(BASE_DIR, day_name)
        print(f"\n=== DAY {day_name} ===")

        if not os.path.isdir(day_dir):
            print(f"[skip] day folder missing: {day_dir}")
            skipped_days += 1
            continue

        payload = _load_payload_for_day(day_dir)
        if not payload:
            print(f"[skip] no valid payload in: {day_dir}")
            skipped_days += 1
            continue

        days_with_payload += 1
        total_rows_in_file = len(payload)

        # Hygiene for the day (still AS-IS structure)
        deduped_payload, dupes_removed = _dedupe_payload(payload)
        total_dupes_removed += dupes_removed

        if DROP_SAME_CURRENCY:
            filtered_payload, same_drop = _filter_same_currency(deduped_payload)
        else:
            filtered_payload, same_drop = deduped_payload, 0
        total_same_currency_dropped += same_drop

        n_after = len(filtered_payload)
        print(f"[day] rows: {total_rows_in_file} → {n_after} "
              f"(dedup removed {dupes_removed}, same-currency dropped {same_drop})")

        if n_after == 0:
            print(f"[day] {day_name}: nothing to post after filters; skipping")
            _write_day_summary(day_dir,
                               total_rows=total_rows_in_file,
                               rows_after_filters=n_after,
                               dupes_removed=dupes_removed,
                               same_currency_dropped=same_drop,
                               posted_batches=0,
                               rows_sent=0,
                               errors=0)
            continue

        # STRICT: process this day's batches sequentially
        chunks = _chunked(filtered_payload, bs)
        total_chunks = len(chunks)
        print(f"[day] batching: {total_chunks} batch(es) of up to {bs}")

        day_had_success = False
        day_errors = 0
        day_rows_sent = 0
        day_batches_posted = 0

        for idx, batch in enumerate(chunks, start=1):
            print(f"[post] {day_name} | batch {idx}/{total_chunks}: {len(batch)} rows → /batch")

            ok, resp_json, resp_text = _post_payload_batch(batch)

            if ok:
                day_had_success = True
                day_batches_posted += 1
                posted_batches += 1
                day_rows_sent += len(batch)
                rows_sent += len(batch)

                if resp_json is not None:
                    # Uncomment for verbose:
                    # print(json.dumps(resp_json, indent=2, ensure_ascii=False)[:2000])
                    pass
                elif resp_text is not None:
                    print(resp_text[:1000])
                else:
                    print("[info] posted OK (no response body)")
            else:
                day_errors += 1
                errors += 1
                print(f"[error] POST failed for {day_name} batch {idx}/{total_chunks}:\n"
                      f"{(resp_text or '(no body)')[:1000]}")
                if STOP_ON_ERROR:
                    print("[halt] STOP_ON_ERROR=True → halting at this batch")
                    # write partial day summary then stop everything
                    _write_day_summary(day_dir,
                                       total_rows=total_rows_in_file,
                                       rows_after_filters=n_after,
                                       dupes_removed=dupes_removed,
                                       same_currency_dropped=same_drop,
                                       posted_batches=day_batches_posted,
                                       rows_sent=day_rows_sent,
                                       errors=day_errors)
                    print("\n[summary]")
                    print(f"  days in range         : {total_days}")
                    print(f"  days with payload     : {days_with_payload}")
                    print(f"  posted days           : {posted_days}")
                    print(f"  posted batches        : {posted_batches}")
                    print(f"  rows sent             : {rows_sent}")
                    print(f"  skipped days          : {skipped_days}")
                    print(f"  errors                : {errors}")
                    print(f"  duplicates removed    : {total_dupes_removed}")
                    print(f"  same-currency dropped : {total_same_currency_dropped}")
                    return

        if day_had_success:
            posted_days += 1

        _write_day_summary(day_dir,
                           total_rows=total_rows_in_file,
                           rows_after_filters=n_after,
                           dupes_removed=dupes_removed,
                           same_currency_dropped=same_drop,
                           posted_batches=day_batches_posted,
                           rows_sent=day_rows_sent,
                           errors=day_errors)

    # Final overall summary
    print("\n[summary]")
    print(f"  days in range         : {total_days}")
    print(f"  days with payload     : {days_with_payload}")
    print(f"  posted days           : {posted_days}")
    print(f"  posted batches        : {posted_batches}")
    print(f"  rows sent             : {rows_sent}")
    print(f"  skipped days          : {skipped_days}")
    print(f"  errors                : {errors}")
    print(f"  duplicates removed    : {total_dupes_removed}")
    print(f"  same-currency dropped : {total_same_currency_dropped}")

if __name__ == "__main__":
    main()


[range] 2025-07-04 → 2025-07-15 (inclusive)
[config] batch_size = 50, endpoint = /batch
[mode] STRICT ORDER: per-day, then per-day batches (no interleaving)


=== DAY 2025-07-04 ===
[day] rows: 386 → 386 (dedup removed 0, same-currency dropped 0)
[day] batching: 8 batch(es) of up to 50
[post] 2025-07-04 | batch 1/8: 50 rows → /batch
[post] 2025-07-04 | batch 2/8: 50 rows → /batch
