schunkdev · nikolaischunk · May 15, 2026 · May 15, 2026
diff --git a/scripts/cleanup_icon_csv.py b/scripts/cleanup_icon_csv.py
@@ -13,12 +13,36 @@
 import csv
 import io
 import re
+import unicodedata
 from dataclasses import dataclass
 from typing import Iterable, Iterator
 
 
 HEADER_NEEDLES = ("Marke: Name", "Artikelbeschr.", "Menge Verkauft", "Gesamtsumme Erlös")
 
+_ENCODINGS = ("utf-8", "utf-8-sig", "mac_roman", "cp1252", "latin-1")
+
+
+def _decode_csv_bytes(raw: bytes) -> str:
+    """Try common encodings; prefer the first that decodes AND contains the expected headers."""
+    normalized_needles = [_normalize(n) for n in HEADER_NEEDLES]
+    candidates: list[str] = []
+    for enc in _ENCODINGS:
+        try:
+            text = raw.decode(enc)
+        except (UnicodeDecodeError, ValueError):
+            continue
+        norm_text = _normalize(text)
+        if all(needle in norm_text for needle in normalized_needles):
+            return text
+        candidates.append(text)
+    return candidates[0] if candidates else raw.decode("utf-8", errors="replace")
+
+
+def _normalize(text: str) -> str:
+    """NFC-normalize and collapse whitespace for robust comparison."""
+    return " ".join(unicodedata.normalize("NFC", text).split())
+
 KDE_RE = re.compile(r"^KDE\d+\b")
 SUBTOTAL_RE = re.compile(r"^Gesamtsumme\s*-")
 
@@ -39,10 +63,16 @@ class Row:
 
 
 def _find_header_idx(lines: list[str]) -> int:
+    normalized_needles = [_normalize(n) for n in HEADER_NEEDLES]
     for i, line in enumerate(lines):
-        if all(needle in line for needle in HEADER_NEEDLES):
+        norm_line = _normalize(line)
+        if all(needle in norm_line for needle in normalized_needles):
             return i
-    missing = [n for n in HEADER_NEEDLES if not any(n in ln for ln in lines)]
+    missing = [
+        n
+        for n, nn in zip(HEADER_NEEDLES, normalized_needles)
+        if not any(nn in _normalize(ln) for ln in lines)
+    ]
     raise ValueError(
         "This does not appear to be a valid ICON Outdoor sales export. "
         "The required data header row was not found. "
@@ -51,10 +81,17 @@ def _find_header_idx(lines: list[str]) -> int:
     )
 
 
-def _parse_csv_line(line: str) -> list[str]:
-    row = next(csv.reader([line], delimiter=",", quotechar='"', skipinitialspace=True))
-    if len(row) == 1 and "," in row[0]:
-        row = next(csv.reader([row[0]], delimiter=",", quotechar='"', skipinitialspace=True))
+def _detect_delimiter(header_line: str) -> str:
+    """Pick ';' or ',' based on which appears more often in the header row."""
+    if header_line.count(";") > header_line.count(","):
+        return ";"
+    return ","
+
+
+def _parse_csv_line(line: str, delimiter: str = ",") -> list[str]:
+    row = next(csv.reader([line], delimiter=delimiter, quotechar='"', skipinitialspace=True))
+    if len(row) == 1 and delimiter in row[0]:
+        row = next(csv.reader([row[0]], delimiter=delimiter, quotechar='"', skipinitialspace=True))
     if len(row) < 6:
         row = row + [""] * (6 - len(row))
     return [c.strip() for c in row[:6]]
@@ -84,17 +121,17 @@ def _to_float(x: str) -> float | None:
         return None
 
 
-def iter_clean_rows(lines: Iterable[str]) -> Iterator[Row]:
+def iter_clean_rows(lines: Iterable[str], delimiter: str = ",") -> Iterator[Row]:
     current_kunde = ""
     current_brand = ""
 
     for line in lines:
         if not line.strip():
             continue
 
-        c0, c1, c2, c3, c4, c5 = _parse_csv_line(line)
+        c0, c1, c2, c3, c4, c5 = _parse_csv_line(line, delimiter)
 
-        if KDE_RE.search(c0) and not any([c1, c2, c3, c4, c5]):
+        if KDE_RE.search(c0) and not any([c1, c2, c3]):
             current_kunde = c0
             current_brand = ""
             continue
@@ -313,7 +350,7 @@ def process_csv_bytes(csv_bytes: bytes) -> bytes:
     if not csv_bytes or not csv_bytes.strip():
         raise ValueError("The uploaded file is empty. Please provide a valid ICON Outdoor sales export CSV.")
 
-    text = csv_bytes.decode("utf-8", errors="replace")
+    text = _decode_csv_bytes(csv_bytes)
     lines = text.splitlines()
 
     if len(lines) < 2:
@@ -323,8 +360,9 @@ def process_csv_bytes(csv_bytes: bytes) -> bytes:
         )
 
     header_idx = _find_header_idx(lines)
+    delimiter = _detect_delimiter(lines[header_idx])
     data_lines = lines[header_idx + 1:]
-    rows = list(iter_clean_rows(data_lines))
+    rows = list(iter_clean_rows(data_lines, delimiter))
 
     if not rows:
         raise ValueError(

diff --git a/src/app/api/process/route.ts b/src/app/api/process/route.ts
@@ -0,0 +1,82 @@
+import { NextRequest, NextResponse } from "next/server";
+import { execFile } from "node:child_process";
+import { writeFile, unlink } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { randomUUID } from "node:crypto";
+
+const PYTHON = process.env.PYTHON_BIN ?? "python3";
+const SCRIPT = join(process.cwd(), "scripts", "cleanup_icon_csv.py");
+
+const RUNNER = `
+import sys, pathlib
+sys.path.insert(0, str(pathlib.Path(sys.argv[1]).parent))
+from cleanup_icon_csv import process_csv_bytes
+csv_path = sys.argv[2]
+out_path = sys.argv[3]
+data = pathlib.Path(csv_path).read_bytes()
+result = process_csv_bytes(data)
+pathlib.Path(out_path).write_bytes(result)
+`;
+
+export async function POST(req: NextRequest) {
+  const formData = await req.formData();
+  const file = formData.get("file");
+
+  if (!file || !(file instanceof Blob)) {
+    return NextResponse.json(
+      { error: 'No field named "file" found in the upload.' },
+      { status: 400 },
+    );
+  }
+
+  const id = randomUUID();
+  const csvPath = join(tmpdir(), `icon-csv-${id}.csv`);
+  const xlsxPath = join(tmpdir(), `icon-xlsx-${id}.xlsx`);
+
+  try {
+    const bytes = Buffer.from(await file.arrayBuffer());
+    await writeFile(csvPath, bytes);
+
+    const xlsxBytes = await new Promise<Buffer>((resolve, reject) => {
+      execFile(
+        PYTHON,
+        ["-c", RUNNER, SCRIPT, csvPath, xlsxPath],
+        { timeout: 60_000, maxBuffer: 50 * 1024 * 1024 },
+        async (err, _stdout, stderr) => {
+          if (err) {
+            const msg = stderr.trim().split("\n").pop() ?? err.message;
+            reject(new Error(msg));
+            return;
+          }
+          try {
+            const { readFile } = await import("node:fs/promises");
+            resolve(await readFile(xlsxPath));
+          } catch (e) {
+            reject(e);
+          }
+        },
+      );
+    });
+
+    return new NextResponse(xlsxBytes, {
+      headers: {
+        "Content-Type":
+          "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+        "Content-Disposition":
+          'attachment; filename="ICON_CSV_Cleanup_Dashboard.xlsx"',
+      },
+    });
+  } catch (e) {
+    const message = e instanceof Error ? e.message : "Processing failed";
+    const isValidation =
+      message.includes("valid ICON") || message.includes("Missing column");
+    return NextResponse.json(
+      { error: message },
+      { status: isValidation ? 422 : 500 },
+    );
+  } finally {
+    await unlink(csvPath).catch(() => {});
+    await unlink(xlsxPath).catch(() => {});
+  }
+}