rad1092 · rad1092 · Feb 15, 2026 · Feb 15, 2026
diff --git a/README.md b/README.md
@@ -51,6 +51,33 @@
 3. 실행 명령어
 4. `pyproject.toml` 또는 의존성 목록
 
+### 업로드 입력 지원 범위 / 제약 / 준비 권장사항
+
+#### 지원 범위
+- CSV: `input_type=csv` 또는 파일 업로드 시 기본 경로로 처리
+- Excel: `.xlsx`(OOXML) 지원, 시트 목록 조회(`/api/sheets`) + 시트 선택 후 CSV 정규화
+- 문서: `.pdf`, `.docx`, `.pptx` 표 추출(`/api/document/extract`) 후 선택 테이블 분석
+
+#### 제약
+- Excel은 현재 `.xlsx`만 지원(`.xls` 바이너리 포맷 미지원)
+- Excel 시트는 **첫 행 헤더 필수**, 빈 헤더/중복 헤더는 에러 처리
+- 비어 있는 시트(실데이터 없음)는 분석 불가
+- PDF는 암호화/스캔 이미지 기반 문서에서 표 추출이 실패할 수 있음
+- 문서 표 추출 실패 시 `/api/analyze`는 `error` + `error_detail` + `preprocessing_stage=table_extraction` 포맷으로 반환
+
+#### 권장 파일 준비 방법
+- CSV/Excel 공통
+  - 첫 행을 명확한 컬럼명(중복/공백 없음)으로 구성
+  - 숫자 컬럼은 단위/통화를 가능한 일관되게 정리
+  - 완전 빈 행/열은 사전 제거
+- Excel
+  - 분석 대상 시트를 분리(요약 시트/원본 시트 혼합 최소화)
+  - merged cell/복잡 서식보다 표 형태(행-열) 우선
+- 문서(PDF/Word/PPT)
+  - 스캔본보다 텍스트 기반 원본 사용 권장
+  - 테이블 경계(|, 탭, 명확한 셀 구분)가 보존된 원본이 유리
+  - 추출 신뢰도가 낮거나 실패하면 CSV로 변환 후 업로드 경로를 권장
+
 ---
 
 ## 1) 이번 문서에서 바로 할 일

diff --git a/tests/test_ui_contract.py b/tests/test_ui_contract.py
@@ -0,0 +1,23 @@
+from pathlib import Path
+
+
+def _app_js_text() -> str:
+    return (Path(__file__).resolve().parents[1] / 'bitnet_tools' / 'ui' / 'app.js').read_text(encoding='utf-8')
+
+
+def test_api_error_detail_priority_is_consistent_for_post_and_get():
+    text = _app_js_text()
+    expected = "data?.error_detail || data?.error || JSON.stringify(data || {})"
+    assert text.count(expected) >= 2
+
+
+def test_ui_failure_status_messages_are_defined_consistently():
+    text = _app_js_text()
+    for phrase in [
+        "setStatus('입력 전처리 실패')",
+        "setStatus('차트 작업 실패')",
+        "setStatus('분석 실패')",
+        "setStatus('멀티 분석 실패')",
+        "setStatus('모델 실행 실패')",
+    ]:
+        assert phrase in text
diff --git a/tests/test_web.py b/tests/test_web.py
@@ -1,11 +1,93 @@
 import time
+import threading
+import urllib.request
+import urllib.error
+import json
 from pathlib import Path
 
 import base64
 import io
 import zipfile
 
 import bitnet_tools.web as web
+from http.server import ThreadingHTTPServer
+
+
+def _xlsx_sheet_xml(rows):
+    row_nodes = []
+    for r_idx, row in enumerate(rows, start=1):
+        cell_nodes = []
+        for c_idx, val in enumerate(row, start=1):
+            col = chr(ord('A') + c_idx - 1)
+            ref = f"{col}{r_idx}"
+            if val is None:
+                continue
+            if isinstance(val, (int, float)):
+                cell_nodes.append(f'<c r="{ref}"><v>{val}</v></c>')
+            else:
+                escaped = str(val).replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
+                cell_nodes.append(f'<c r="{ref}" t="inlineStr"><is><t>{escaped}</t></is></c>')
+        row_nodes.append(f'<row r="{r_idx}">{"".join(cell_nodes)}</row>')
+    return (
+        '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
+        '<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">'
+        f'<sheetData>{"".join(row_nodes)}</sheetData>'
+        '</worksheet>'
+    )
+
+
+def _make_xlsx_b64(sheet_map):
+    workbook_sheets = []
+    rels = []
+    mem = io.BytesIO()
+    with zipfile.ZipFile(mem, 'w') as zf:
+        for idx, (name, rows) in enumerate(sheet_map.items(), start=1):
+            rid = f'rId{idx}'
+            workbook_sheets.append(f'<sheet name="{name}" sheetId="{idx}" r:id="{rid}"/>')
+            rels.append(
+                f'<Relationship Id="{rid}" '
+                'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" '
+                f'Target="worksheets/sheet{idx}.xml"/>'
+            )
+            zf.writestr(f'xl/worksheets/sheet{idx}.xml', _xlsx_sheet_xml(rows))
+
+        workbook_xml = (
+            '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
+            '<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" '
+            'xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">'
+            f'<sheets>{"".join(workbook_sheets)}</sheets>'
+            '</workbook>'
+        )
+        rel_xml = (
+            '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
+            '<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
+            f'{"".join(rels)}'
+            '</Relationships>'
+        )
+        zf.writestr('xl/workbook.xml', workbook_xml)
+        zf.writestr('xl/_rels/workbook.xml.rels', rel_xml)
+    return base64.b64encode(mem.getvalue()).decode('ascii')
+
+
+def _run_server():
+    server = ThreadingHTTPServer(('127.0.0.1', 0), web.Handler)
+    thread = threading.Thread(target=server.serve_forever, daemon=True)
+    thread.start()
+    return server, thread
+
+
+def _post_json(url, payload):
+    req = urllib.request.Request(
+        url,
+        data=json.dumps(payload).encode('utf-8'),
+        headers={'Content-Type': 'application/json'},
+        method='POST',
+    )
+    try:
+        with urllib.request.urlopen(req) as resp:
+            return resp.getcode(), json.loads(resp.read().decode('utf-8'))
+    except urllib.error.HTTPError as exc:
+        return exc.code, json.loads(exc.read().decode('utf-8'))
 
 
 def test_submit_and_get_chart_job_done(monkeypatch, tmp_path):
@@ -106,3 +188,97 @@ def test_coerce_document_payload_to_csv_text():
     assert source == 'sample.docx'
     assert 'h1,h2' in csv_text
     assert meta['table_id'] == 'docx_table_1'
+
+
+def test_excel_single_sheet_normalization():
+    b64 = _make_xlsx_b64({'Sales': [['region', 'amount'], ['seoul', 100], ['busan', 120]]})
+
+    source, csv_text, meta = web._coerce_csv_text_from_file_payload({
+        'input_type': 'excel',
+        'name': 'sales.xlsx',
+        'file_base64': b64,
+    })
+
+    assert source == 'sales.xlsx'
+    assert 'region,amount' in csv_text
+    assert 'seoul,100' in csv_text
+    assert meta['sheet_name'] == '<first_sheet>'
+
+
+def test_excel_multi_sheet_selection_uses_target_sheet():
+    b64 = _make_xlsx_b64({
+        'Raw': [['c1', 'c2'], ['a', 1]],
+        'Summary': [['city', 'score'], ['busan', 9]],
+    })
+
+    csv_text = web._normalize_excel_base64_to_csv_text(b64, sheet_name='Summary')
+
+    assert 'city,score' in csv_text
+    assert 'busan,9' in csv_text
+    assert 'c1,c2' not in csv_text
+
+
+def test_excel_empty_sheet_raises_validation_error():
+    import pytest
+
+    b64 = _make_xlsx_b64({'Empty': []})
+
+    with pytest.raises(ValueError, match='selected sheet has no non-empty rows'):
+        web._normalize_excel_base64_to_csv_text(b64, sheet_name='Empty')
+
+
+def test_excel_header_validation_rejects_empty_and_duplicate_columns():
+    import pytest
+
+    empty_header_b64 = _make_xlsx_b64({'BadHeader': [['id', ''], [1, 2]]})
+    with pytest.raises(ValueError, match='empty header at index 1'):
+        web._normalize_excel_base64_to_csv_text(empty_header_b64)
+
+    dup_header_b64 = _make_xlsx_b64({'DupHeader': [['id', 'id'], [1, 2]]})
+    with pytest.raises(ValueError, match='duplicated header'):
+        web._normalize_excel_base64_to_csv_text(dup_header_b64)
+
+
+def test_document_extract_api_success_and_failure_payload_contract():
+    server, thread = _run_server()
+    base = f'http://127.0.0.1:{server.server_port}'
+    try:
+        ok_code, ok_body = _post_json(base + '/api/document/extract', {
+            'input_type': 'document',
+            'source_name': 'ok.docx',
+            'file_base64': _make_docx_b64(),
+        })
+        assert ok_code == 200
+        assert ok_body['tables']
+
+        fail_code, fail_body = _post_json(base + '/api/document/extract', {
+            'input_type': 'document',
+            'source_name': 'scan.pdf',
+            'file_base64': base64.b64encode(b'%PDF-1.4\n<< /Subtype /Image >>\n').decode('ascii'),
+        })
+        assert fail_code == 200
+        assert fail_body['tables'] == []
+        assert fail_body['failure_reason'] == '스캔 이미지'
+        assert fail_body['failure_detail']
+    finally:
+        server.shutdown()
+        thread.join(timeout=1)
+
+
+def test_analyze_document_fallback_error_uses_error_and_error_detail():
+    server, thread = _run_server()
+    base = f'http://127.0.0.1:{server.server_port}'
+    try:
+        code, body = _post_json(base + '/api/analyze', {
+            'input_type': 'document',
+            'source_name': 'locked.pdf',
+            'file_base64': base64.b64encode(b'%PDF-1.4\n1 0 obj\n<< /Encrypt 2 0 R >>\nendobj\n').decode('ascii'),
+            'question': '요약',
+        })
+        assert code == 400
+        assert body['error'] == 'document table extraction failed'
+        assert 'error_detail' in body
+        assert body['preprocessing_stage'] == 'table_extraction'
+    finally:
+        server.shutdown()
+        thread.join(timeout=1)