In [None]:
import os
import time
import json
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, TimeoutException, UnexpectedAlertPresentException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# 경로 설정
BASE_URL = "https://www.koicd.kr/ins/act.do"
BASE_DIR = os.path.abspath("suga_results")
JSON_DIR = os.path.join(BASE_DIR, "json_pages")
FAILED_FILE = os.path.join(BASE_DIR, "failed_codes.txt")
CSV_FILE = os.path.join(BASE_DIR, "suga_info_all.csv")

# 폴더 생성
os.makedirs(JSON_DIR, exist_ok=True)

# 드라이버 준비
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
driver.get(BASE_URL)
time.sleep(2)

all_data = []
failed_codes = []
page_num = 1

while True:
    print(f"\n▶ 페이지 {page_num} 접속 중...")

    try:
        table = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "table.act_table")))
        rows = table.find_elements(By.CSS_SELECTOR, "tbody > tr")
    except Exception as e:
        print(f"❌ 테이블 로딩 실패: {e}")
        break

    page_data = []

    for row in rows:
        cls = row.get_attribute("class")
        if not cls or not cls.isdigit():
            continue

        try:
            suga_code = row.find_element(By.CSS_SELECTOR, "td:nth-child(2)").text.strip()
            kor_name = row.find_element(By.CSS_SELECTOR, "td:nth-child(3)").text.strip()
            eng_name = row.find_element(By.CSS_SELECTOR, "td:nth-child(4)").text.strip()

            try:
                toggle_btn = row.find_element(By.CSS_SELECTOR, "td.child b")
                driver.execute_script("arguments[0].click();", toggle_btn)
                time.sleep(0.5)

                sub_rows = table.find_elements(By.CSS_SELECTOR, f"tr.child{suga_code}.table")
                sub_data = []
                for sub in sub_rows:
                    tds = sub.find_elements(By.TAG_NAME, "td")
                    if len(tds) >= 3:
                        sub_code = tds[1].text.strip()
                        sub_name = tds[2].text.strip()
                        sub_data.append({
                            "parent_code": suga_code,
                            "sub_code": sub_code,
                            "sub_name": sub_name
                        })

                if sub_data:
                    page_data.extend(sub_data)
                else:
                    print(f"⚠️  {cls} ({suga_code}) 클릭했지만 하위 없음")
                    page_data.append({
                        "parent_code": suga_code,
                        "sub_code": "",
                        "sub_name": kor_name
                    })

            except NoSuchElementException:
                print(f"🟡  {suga_code} (단독항목) → 단독 저장")
                page_data.append({
                    "parent_code": suga_code,
                    "sub_code": "",
                    "sub_name": kor_name
                })

        except UnexpectedAlertPresentException as alert_ex:
            print(f"❌ {cls} ({suga_code}) 처리 실패 → Alert: {alert_ex}")
            failed_codes.append(suga_code)
            driver.switch_to.alert.dismiss()
            continue

        except Exception as e:
            print(f"❌ {cls} 처리 실패 → {e}")
            try:
                suga_code = row.find_element(By.CSS_SELECTOR, "td:nth-child(2)").text.strip()
                failed_codes.append(suga_code)
            except:
                continue

    # 페이지별 JSON 저장
    json_path = os.path.join(JSON_DIR, f"page_{page_num}.json")
    with open(json_path, "w", encoding="utf-8") as jf:
        json.dump(page_data, jf, ensure_ascii=False, indent=2)

    all_data.extend(page_data)

    # ✅ 중간 CSV 저장
    try:
        with open(CSV_FILE, "w", newline="", encoding="utf-8-sig") as cf:
            writer = csv.writer(cf)
            writer.writerow(["parent_code", "sub_code", "sub_name"])
            for row in all_data:
                writer.writerow([row["parent_code"], row["sub_code"], row["sub_name"]])
        if os.path.exists(CSV_FILE):
            print(f"✅ 중간 CSV 저장 완료: {CSV_FILE}")
    except Exception as e:
        print(f"❌ 중간 CSV 저장 실패: {e}")

    # 다음 페이지로 이동
    try:
        next_btn = driver.find_element(By.LINK_TEXT, str(page_num + 1))
        driver.execute_script("arguments[0].scrollIntoView(true);", next_btn)
        time.sleep(0.5)
        next_btn.click()
        time.sleep(1)
        page_num += 1
    except Exception:
        print(f"⛔ 페이지 {page_num+1} 이동 실패. 중단.")
        break

# 실패 로그 저장
with open(FAILED_FILE, "w", encoding="utf-8") as ff:
    for code in failed_codes:
        ff.write(code + "\n")

print("\n📦 전체 데이터 통합 완료.")
print("✅ 모든 항목 정상 처리 완료.")

driver.quit()
