In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime

url = "https://www.kia.com/kr/services/ko/faq.search"
BASE_URL = "https://www.kia.com"

CATEGORY_MAP = {
    "kwp:kr/faq/charging": "충전",
    "kwp:kr/faq/conversion": "개조/전환",
    "kwp:kr/faq/etc": "기타",
    "kwp:kr/faq/ev-car": "전기차",
    "kwp:kr/faq/kia-app": "기아 앱",
    "kwp:kr/faq/maintenance": "정비/유지보수",
    "kwp:kr/faq/members": "멤버스",
    "kwp:kr/faq/pbv": "PBV",
    "kwp:kr/faq/purchase": "구매"
}

rows = []
now = datetime.now()

for tag, category_name in CATEGORY_MAP.items():
    res = requests.get(url, params={"searchTag": tag}).json()
    items = res["data"]["faqList"]["items"]

    for item in items:
        # question
        q_html = item["question"]
        q_soup = BeautifulSoup(q_html, "html.parser")
        q_text = q_soup.get_text(strip=True)

        # answer
        a_html = item["answer"]["html"]
        a_soup = BeautifulSoup(a_html, "html.parser")
        a_text = a_soup.get_text("\n", strip=True)

        # images
        img_urls = []
        for img in a_soup.find_all("img"):
            src = img.get("src")
            if src:
                img_urls.append(
                    BASE_URL + src if src.startswith("/") else src
                )

        rows.append({
            "category_code": tag,
            "category_name": category_name,
            "question_html": q_html,
            "question_text": q_text,
            "answer_html": a_html,
            "answer_text": a_text,
            "image_urls": img_urls if img_urls else [],
            "source_url": "https://www.kia.com/kr/customer-service/center/faq",
            "created_at": now,
            "updated_at": now
        })

kia_df = pd.DataFrame(rows)
kia_df["company"] = "kia"

In [3]:
kia_df.to_pickle("FAQ_KIA.pkl")