<a href="https://colab.research.google.com/github/terisuke/colab/blob/main/job_search.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!apt-get update -qq
!apt-get install -y chromium-chromedriver -qq
!pip install selenium gspread google-auth beautifulsoup4 lxml spacy requests pykakasi

!python -m spacy download ja_core_news_sm

import time
import re
import gspread
import requests
import spacy  # 日本語の姓名分割に使用
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from google.colab import auth
from google.auth import default
from selenium.common.exceptions import TimeoutException, NoSuchElementException, WebDriverException
from pykakasi import kakasi
import json  # jsonを扱うために追加
import os    # 環境変数の取得で使用


# =========================================
#  設定 (ここをご自身の環境に合わせて変更)
# =========================================
# ここにHunter.io APIキーを追加し、環境変数として扱えるようにする例です。
# 実際にお使いになる際は、別途「os.environ」にセットするか、
# ノートブック等で %env HUNTER_API_KEY=... のように設定してください。
os.environ["HUNTER_API_KEY"] = "xxxxxxxxxxxxxxxxxxxxx"  # 実際のキーを入れてください

PERPLEXITY_API_KEY = "xxxxxxxxxxxxxxxxxxxx"  # 実際のキーを入れてください
GEMINI_API_KEY = "xxxxxxxxxxxxxxxxxxx"       # Gemini APIキー
SPREADSHEET_NAME = "Hakata_Job_Data"   # 既存または新規作成するスプレッドシート名
JOB_SHEET_NAME = "求人情報"            # 求人を保存するシート名
OFFICER_SHEET_NAME = "役員情報"        # 役員情報を保存するシート名
MAX_LINKS = 10  # スクレイピングする求人情報の最大数


# =========================================
#  Google スプレッドシート認証
# =========================================
auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)


# =========================================
#  スプレッドシートとシートの準備
# =========================================
def get_or_create_sheet(spreadsheet_name, sheet_name):
    """
    スプレッドシートとシートを取得、なければ作成
    エラー発生時に詳細を出力しつつスキップ可能にする
    """
    try:
        spreadsheet = gc.open(spreadsheet_name)
        print(f"スプレッドシート '{spreadsheet_name}' は既に存在します。")
    except gspread.exceptions.SpreadsheetNotFound:
        spreadsheet = gc.create(spreadsheet_name)
        print(f"スプレッドシート '{spreadsheet_name}' を新規作成しました。")

    try:
        worksheet = spreadsheet.worksheet(sheet_name)
        print(f"シート '{sheet_name}' は既に存在します。")
    except gspread.exceptions.WorksheetNotFound:
        worksheet = spreadsheet.add_worksheet(title=sheet_name, rows="100", cols="20")
        print(f"シート '{sheet_name}' を新規作成しました。")
    return spreadsheet, worksheet

spreadsheet, worksheet = get_or_create_sheet(SPREADSHEET_NAME, JOB_SHEET_NAME)
_, officer_worksheet = get_or_create_sheet(SPREADSHEET_NAME, OFFICER_SHEET_NAME)  # 役員情報シートも取得/作成


# =========================================
#  Chrome (headless) の起動
# =========================================
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

try:
    driver = webdriver.Chrome(options=options)
except WebDriverException as e:
    print(f"Chrome ドライバの起動に失敗しました: {e}")
    raise SystemExit("処理を続行できません。")


# =========================================
#  ハローワークからの求人情報スクレイピング
# =========================================
def scrape_job_data(driver, max_links):
    """ハローワークから求人情報をスクレイピング"""
    url = "https://www.hellowork.mhlw.go.jp/"
    try:
        driver.get(url)
        time.sleep(2)
    except WebDriverException as e:
        print(f"【エラー】ハローワークサイトにアクセスできませんでした: {e}")
        return []

    try:
        # 「求人情報検索」アイコンを押す
        driver.find_element(By.CLASS_NAME, "retrieval_icn").click()
        time.sleep(5)
    except NoSuchElementException as e:
        print(f"【エラー】'求人情報検索' アイコンが見つかりませんでした: {e}")
        return []

    # 福岡県、博多区を選択 (都道府県:40=福岡県)
    try:
        Select(driver.find_element(By.ID, "ID_tDFK1CmbBox")).select_by_value("40")
        time.sleep(2)
    except NoSuchElementException as e:
        print(f"【エラー】都道府県選択ができませんでした: {e}")
        return []

    # 「市区町村などでさらに絞り込む」ボタン
    try:
        driver.find_elements(By.CSS_SELECTOR, "input.button")[1].click()
        time.sleep(2)
    except (NoSuchElementException, IndexError) as e:
        print(f"【エラー】'市区町村などでさらに絞り込む' ボタンが見つかりませんでした: {e}")
        return []

    # 博多区 (40132) を選択
    try:
        Select(driver.find_element(By.ID, "ID_rank1CodeMulti")).select_by_value("40132")
        time.sleep(2)
        driver.find_element(By.ID, "ID_ok").click()  # 「OK」
        time.sleep(5)
        driver.find_element(By.ID, "ID_searchBtn").click()  # 「検索」
        time.sleep(5)
    except NoSuchElementException as e:
        print(f"【エラー】市区町村選択ができませんでした: {e}")
        return []

    # 50件表示
    try:
        Select(driver.find_element(By.ID, "ID_fwListNaviDispBtm")).select_by_value("50")
        time.sleep(5)
    except NoSuchElementException:
        print("【注意】1ページあたり件数を選択できませんでした。標準の表示件数で継続します。")

    all_results = []
    processed_domains = set()  # 処理済みのドメインを保持するセット

    while True:
        detail_urls, next_page_exists = parse_page(driver, max_links)
        for detail_url in detail_urls:
            if len(all_results) >= max_links:
                break
            detail_info = parse_detail_info(driver, detail_url)
            if detail_info:
                # ドメインが既に処理済みでないか確認
                if detail_info["email_domain"] not in processed_domains:
                    all_results.append(detail_info)
                    processed_domains.add(detail_info["email_domain"])  # ドメインを処理済みとして追加

        if len(all_results) >= max_links or not next_page_exists:
            break

        try:
            next_btn = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.NAME, "fwListNaviBtnNext"))
            )
            next_btn.click()
            time.sleep(5)
        except TimeoutException:
            print("次のページボタンが見つからなかったため、ページ移動を中止します。")
            break
    return all_results

def parse_page(driver, max_links):
    """一覧ページから詳細ページURLを抽出"""
    soup = BeautifulSoup(driver.page_source, "html.parser")
    detail_urls = []
    for job in soup.find_all("table", class_="kyujin"):
        detail_btn = job.find("a", string=lambda t: t and "詳細を表示" in t)
        if detail_btn:
            href = detail_btn['href']
            if href.startswith("./"):
                href = href[2:]
            detail_url = "https://www.hellowork.mhlw.go.jp/kensaku/" + href
            detail_urls.append(detail_url)
            if len(detail_urls) >= max_links:
                break

    try:
        current_page_btn = driver.find_element(By.XPATH, "//ul[@class='flex page_navi']/li/input[@disabled and @name!='fwListNaviBtnPrev']")
        current_page = int(current_page_btn.get_attribute("value"))
        print(f"現在のページ: {current_page}")
    except NoSuchElementException:
        print("現在表示しているのが最後のページです。")

    try:
        driver.find_element(By.NAME, "fwListNaviBtnNext")
        next_page_exists = True
    except NoSuchElementException:
        next_page_exists = False

    return detail_urls, next_page_exists

def parse_detail_info(driver, url):
    """詳細ページから情報を抽出"""
    main_window = driver.current_window_handle
    driver.execute_script("window.open('');")
    driver.switch_to.window(driver.window_handles[-1])
    try:
        driver.get(url)
        time.sleep(2)
    except WebDriverException as e:
        print(f"【エラー】求人詳細ページにアクセスできませんでした: {url}, エラー: {e}")
        driver.close()
        driver.switch_to.window(main_window)
        return None

    soup = BeautifulSoup(driver.page_source, "html.parser")

    def get_text_safe(soup, element_id, field_name):
        try:
            return soup.find(id=element_id).get_text(strip=True)
        except AttributeError:
            print(f"詳細ページで {field_name} が見つかりませんでした: {url}")
            return ""

    company_name = get_text_safe(soup, "ID_jgshMei", "会社名")
    if not company_name:
        print(f"会社名が取得できなかったため、この求人情報はスキップします: {url}")
        driver.close()
        driver.switch_to.window(main_window)
        return None

    # 支店、営業所などを除外
    exclude_keywords = ["支店", "営業所", "支社", "出張所"]  # 除外キーワードリスト
    if any(keyword in company_name for keyword in exclude_keywords):
        print(f"会社名に除外キーワードが含まれているため、スキップします: {company_name}")
        driver.close()
        driver.switch_to.window(main_window)
        return None

    representative_name = get_text_safe(soup, "ID_dhshaMei", "代表者名")
    industry = get_text_safe(soup, "ID_sngBrui", "産業分類")
    email = get_text_safe(soup, "ID_ttsEmail", "メールアドレス")

    if not email:
        print(f"メールアドレスが取得できなかったため、この求人情報はスキップします: {url}")
        driver.close()
        driver.switch_to.window(main_window)
        return None

    # メールアドレスをローカルパートとドメインパートに分割
    if '@' in email:
        email_local, email_domain = email.split('@', 1)  # 1回だけ分割
    else:
        email_local = ""
        email_domain = email

    driver.close()
    driver.switch_to.window(main_window)

    return {
        "company_name": company_name,
        "representative_name": representative_name,
        "industry": industry,
        "email_local": email_local,
        "email_domain": email_domain,
        "url": url
    }


# =========================================
#  Perplexity API関連
# =========================================
def get_officers_from_perplexity(company_name):
    """Perplexity APIで役員情報テキストを取得"""
    payload = {
        "model": "sonar-pro",  # "mistral-large-latest", "pplx-70b-online", "pplx-70b-chat",
        "messages": [
            {
                "role": "user",
                "content": (
                    f"Search for information about the executive officers of {company_name}. "
                    "Provide any relevant details, including names, titles, and possibly other related information."
                )
            }
        ],
        "max_tokens": 2048,
        "temperature": 0.2
    }
    headers = {
        "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
        "Content-Type": "application/json",
        "Accept": "application/json"
    }
    try:
        resp = requests.post("https://api.perplexity.ai/chat/completions", headers=headers, json=payload)
        resp.raise_for_status()  # 失敗時例外
        result_json = resp.json()
        if "choices" in result_json and len(result_json["choices"]) > 0:
            content = result_json["choices"][0]["message"]["content"]
            return content if content else ""
        else:
            print("【注意】Perplexity応答に'choices'が含まれない or 空です")
            return ""
    except Exception as e:
        print(f"Perplexity API呼び出しエラー: {e}")
        return ""


# =========================================
#  Gemini APIを使って役員情報(役職・姓・名)を抽出
# =========================================
def get_officers_from_gemini(raw_text, company_name=None):
    """
    Gemini Pro に文章を渡して役職・姓・名（可能ならローマ字）を抽出。
    Perplexityの応答から役員リストらしき部分だけを抽出し、Geminiに渡す。
    """
    if not raw_text.strip():
        return []

    # Perplexityの応答から役員リストらしき部分を抽出 (前処理)
    # リスト形式、箇条書き、番号付きリストなどを想定
    officer_list_pattern = r'(\n\s*[-•⋅▪\-*]|\n\s*\d+\.|\n\s*[（\(]\d+[）\)])\s*(.+)'
    matches = re.findall(officer_list_pattern, raw_text)
    officer_text = "\n".join([match[1] for match in matches])  # リスト部分のみ抽出

    # リスト部分がない場合は、元のテキストで試す（フォールバック）
    if not officer_text:
        officer_text = raw_text

    gemini_url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?key={GEMINI_API_KEY}"

    prompt = (
        "You are a highly skilled information extraction assistant specializing in Japanese company officers. "
        "Your ONLY task is to extract officer information from the provided Japanese text and output it as a SINGLE, VALID JSON object. "
        "Follow the structure below precisely. Extract ONLY the following, and ABSOLUTELY NOTHING ELSE:\n"
        "{\n"
        f'  "company_name": "{company_name}",\n'
        '  "officers": [\n'
        "     {\n"
        '      "title": "<Officer Title in Japanese>",\n'
        '      "last_name": "<Officer\'s Last Name in Japanese>",\n'
        '      "first_name": "<Officer\'s First Name in Japanese>",\n'
        '      "last_name_en": "<Officer\'s Last Name in Romaji>",\n'
        '      "first_name_en": "<Officer\'s First Name in Romaji>"\n'
        "     },\n"
        "    ...\n"
        "  ]\n"
        "}\n"
        "  *  **title:** The officer's title in *Japanese*. Examples: 代表取締役社長, 取締役, 監査役, 会長, 社長, 副社長, "
        "専務取締役, 常務取締役, 執行役員, 取締役会長, 代表取締役, 常勤監査役.  "
        "     If the title is in English (e.g., CEO, CFO), IGNORE it. It MUST be a Japanese title.\n"
        "  *  **last_name:** The officer's last name in *Japanese* characters. If not found, use \"\".\n"
        "  *  **first_name:** The officer's first name in *Japanese* characters. If not found, use \"\".\n"
        "  *  **last_name_en:** The Romaji transliteration of the last_name.  MUST be empty if last_name is empty.\n"
        "  *  **first_name_en:** The Romaji transliteration of the first_name. MUST be empty if first_name is empty.\n\n"

        "**ABSOLUTELY, UNDER NO CIRCUMSTANCES, extract anything that is NOT a Japanese officer title and name.**  "
        "Specifically, NEVER include:\n"
        "*   Company establishment dates\n"
        "*   Capital information\n"
        "*   Addresses\n"
        "*   Number of employees\n"
        "*   Sales figures\n"
        "*   General company descriptions\n"
        "*   Introductory/concluding phrases\n"
        "*   Source references\n"
        "*   Any other non-officer information\n\n"
        "If the 'title' field is empty or contains non-title text (e.g., numbers, dates), OMIT the entire object. "
        "The 'officers' array should ONLY contain valid officer entries.\n"
        "Output MUST be valid JSON and NOTHING ELSE. DO NOT explain. DO NOT apologize.\n"
        f"TEXT: {officer_text}"  # 前処理したテキストを渡す
    )

    headers = {"Content-Type": "application/json"}
    data = {
        "contents": [{"parts": [{"text": prompt}]}]
    }

    try:
        resp = requests.post(gemini_url, json=data, headers=headers)
        resp.raise_for_status()
        result = resp.json()

    except requests.exceptions.RequestException as e: #HTTPリクエスト自体のエラー
        print(f"リクエストエラー: {e}")
        return []
    except requests.exceptions.HTTPError as e: #HTTPステータスコードのエラー
        print(f"HTTPエラー: {e}")
        print(f"レスポンス内容: {resp.text}")  # エラーレスポンスの内容を出力
        return []

    gemini_text = result["candidates"][0]["content"]["parts"][0]["text"]
    match = re.search(r"```json\n(.*)\n```", gemini_text, re.DOTALL)
    if match:
        json_text = match.group(1)
    else:
        json_text = gemini_text

    try:
        officers_data = json.loads(json_text)
        if (isinstance(officers_data, dict) and 'officers' in officers_data and
                isinstance(officers_data['officers'], list)):
            # titleが空、または不要なキーワードを含むオブジェクトを除外
            filtered_officers = [
                officer for officer in officers_data['officers']
                if officer.get('title') and is_valid_title(officer.get('title'))
            ]
            return filtered_officers
        else:
            print("Gemini応答が期待した形式ではありません。")
            return []

    except json.JSONDecodeError as e:
        print(f"Gemini応答がJSONとしてパースできませんでした: {e}")
        print(f"Geminiからの応答: {json_text}")
        return []
    except Exception as e:  # JSONDecodeError以外の予期せぬエラー
        print(f"予期せぬエラー: {e}")
        return []


# 役職名として有効かどうかを判定する関数 (後処理用)
def is_valid_title(title):
    """役職名として有効かどうかを判定"""
    invalid_keywords = [
        "Based on", "information", "Company", "Unfortunately", "established", "capital",
        "Representative Director", "Director", "Auditor", "President", "CEO", "CFO", "COO",
        "Executive Officer", "Standing Statutory Auditor", "Auditor", "company's",
        "representative", "Here are", "key details", "following information",
        "provide the following", "search results", "appears to be",
        "also serves as", "main business", "headquarters is located",
        "worth noting", "seems to be", "in charge of", "also serves as",
        "current position", "holds", "shares", "ownership", "does not hold", "previous president",
        "also serves", "main business", "headquarters is located", "appears to have undergone",
        "company name changed", "company focuses on", "search results don't provide",
        "company appears to", "limited and potentially outdated", "suggests that",
        "provides an overview", "executive structure of", "worth noting that",
        "effective", "company specializes in", "specializes in", "appointed as" #必要に応じて追加
    ]
    if not title:
        return False
    # キーワードによるチェック
    if any(keyword.lower() in title.lower() for keyword in invalid_keywords):
        return False
    # 正規表現によるチェック (オプション)
    if re.match(r'^[A-Za-z\s]+$', title):  # 英語のみ
        return False
    if re.search(r'[\d()]', title):  # 数字や括弧を含む
        return False
    return True


# ローマ字変換 + クレンジング
kks = kakasi()
def to_romaji(japanese_name):
    if not japanese_name:
        return ""
    # kakasiによる変換
    converted = kks.convert(japanese_name)
    romaji = " ".join([item["hepburn"].capitalize() for item in converted])
    # 不要な文字を削除 (数字、記号、空白)
    romaji = re.sub(r'[0-9\s\-().,\[\]]', '', romaji)
    return romaji


# =========================================
#  既存のパース関数 (フォールバックに使う) (改善)
# =========================================
def parse_officer_text(answer_text):
    nlp = spacy.load("ja_core_news_sm")
    results = []

    # 役職名辞書 (拡充)
    officer_titles = [
        "代表取締役社長", "代表取締役", "取締役", "監査役", "会長", "社長", "副社長",
        "専務取締役", "常務取締役", "執行役員", "取締役会長", "常勤監査役",
        "社外取締役", "社外監査役", "執行役", "理事長", "理事", "監事",
        "代表理事", "副理事長", "専務理事", "常務理事", "部門長", "本部長",
        "事業部長", "支店長", "所長", "部長", "課長", "室長", "局長","所長"
    ]

    # 役員情報っぽい行を抽出するための正規表現
    officer_pattern = r'(.+?)\s*[:：、,]\s*(.+)'
    officer_pattern2 = r'(.+?)[（\(](.+?)[）\)]'

    for line in answer_text.split("\n"):
        line = line.strip()
        if not line:
            continue

        role = ""
        full_name = ""

        match = re.match(officer_pattern, line)
        if match:
            role = match.group(1).strip()
            full_name = match.group(2).strip()

        if not match:
            match2 = re.match(officer_pattern2, line)
            if match2:
                full_name = match2.group(1).strip()
                role = match2.group(2).strip()

        # 役職名辞書を使って、マッチした部分が役職名かどうかをチェック
        if role:
            found = False
            for title in officer_titles:
                if title in role:
                    role = title  # より長い役職名に置き換え
                    found = True
                    break
            if not found:  # 辞書にない＝＞スキップ
                continue

        if not match and not match2:
            continue

        doc = nlp(full_name)
        # 姓名分割ロジック改善: 複合姓などを考慮
        if len(doc) >= 2:
            # 最後のtokenが"氏"の場合
            if doc[-1].text in ("氏"):
                last_name = "".join([token.text for token in doc[:-1]])
                first_name = ""
            else:
                last_name = doc[0].text
                first_name = "".join([token.text for token in doc[1:]])
        else:
            last_name = full_name
            first_name = ""

        # is_valid_title関数を使って有効性チェック
        if role and is_valid_title(role):
            results.append({
                "title": role,
                "last_name": last_name,
                "first_name": first_name
            })

    return results


# =========================================
#  Hunter.io を使ってメールアドレスを取得する関数
# =========================================
def get_hunter_email(first_name_en, last_name_en, domain):
    """
    Hunter.io Email Finder APIを用いて、
    first_name_en, last_name_en, domain をキーにメールアドレスを推定取得する。
    成功時は文字列を、失敗または見つからない場合は None を返す。
    """
    # APIキーを環境変数から取得
    HUNTER_API_KEY = os.getenv("HUNTER_API_KEY", "")
    if not HUNTER_API_KEY:
        print("[Hunter.io] HUNTER_API_KEY が設定されていません。")
        return None

    base_url = "https://api.hunter.io/v2/email-finder"
    params = {
        "domain": domain,
        "first_name": first_name_en,
        "last_name": last_name_en,
        "api_key": HUNTER_API_KEY
    }

    try:
        resp = requests.get(base_url, params=params)
    except requests.RequestException as e:
        print(f"[Hunter.io] リクエスト失敗: {e}")
        return None

    if resp.status_code == 200:
        data = resp.json().get("data", {})
        email = data.get("email")
        if email:
            print(f"[Hunter.io] {first_name_en} {last_name_en} @ {domain} => {email}")
            return email
        else:
            print(f"[Hunter.io] 見つかりませんでした: {first_name_en} {last_name_en} @ {domain}")
            return None
    elif resp.status_code == 429:
        print("[Hunter.io] レートリミット超過 (429)")
        return None
    else:
        print(f"[Hunter.io] HTTPエラー: {resp.status_code}")
        return None


# =========================================
#  メイン処理
# =========================================
def main():
    """メイン処理"""
    try:
        print("ハローワークから求人情報を取得中...")
        job_data = scrape_job_data(driver, MAX_LINKS)
        driver.quit()  # WebDriverを終了 (エラー発生時も確実に終了させる)
    except Exception as e:
        print(f"【エラー】ハローワーク求人情報の取得中にエラー: {e}")
        driver.quit()
        return

    # 会社ごとのデータまとめ + ドメインごとのデータまとめ
    job_dict = {}
    for job in job_data:
        c = job["company_name"]
        d = job["email_domain"]  # ドメインもキーとして使用
        if c not in job_dict:
            job_dict[c] = {
                "representative_name": job["representative_name"],
                "industry_list": [],  # 産業分類はリスト
                "email_domain_dict": {},  # ドメインをキー、ローカルパートをリストとして持つ辞書
                "url_list": []
            }

        job_dict[c]["industry_list"].append(job["industry"])

        if d not in job_dict[c]["email_domain_dict"]:
            job_dict[c]["email_domain_dict"][d] = []
        job_dict[c]["email_domain_dict"][d].append(job["email_local"])

        job_dict[c]["url_list"].append(job["url"])

    print("求人情報をスプレッドシートに書き込み中...")
    header = [
        "会社名", "代表者名", "産業分類（業種）",
        "メールアドレス(ローカル)", "メールアドレス(ドメイン)", "求人詳細URL"
    ]
    worksheet.update([header], "A1")

    row_data = []
    for company_name, vals in job_dict.items():
        industry_str = ",".join(vals["industry_list"])
        url_str = ",".join(vals["url_list"])

        # ドメインごとにローカルパートを結合して、複数の行を作成
        for domain, local_parts in vals["email_domain_dict"].items():
            email_local_str = ",".join(local_parts)
            row_data.append([
                company_name,
                vals["representative_name"],
                industry_str,
                email_local_str,
                domain,  # ドメイン
                url_str
            ])

    if row_data:
        worksheet.update(row_data, "A2")
    print(f"{len(job_dict)} 社の求人情報をスプレッドシートに転記完了。")

    # 役員情報スプレッドシートのヘッダー
    officer_header = [
        "会社名",
        "Perplexity取得情報",  # 生テキスト
        "役職名",       # Gemini等で抽出
        "役員名（姓）",  # JP
        "役員名（名）",  # JP
        "役員名（姓ローマ字）",  # ローマ字
        "役員名（名ローマ字）",
        "Hunter推定メールアドレス"  # Hunter.io で推定したメールアドレス
    ]
    officer_worksheet.update([officer_header], "A1")

    # 会社リスト
    unique_company_names = list(job_dict.keys())
    officer_rows = []

    for idx, company_name in enumerate(unique_company_names, 1):
        print(f"[{idx}/{len(unique_company_names)}] 会社: {company_name} の役員検索...")
        try:
            raw_text = get_officers_from_perplexity(company_name)  # Perplexityから情報取得
            if not raw_text:
                print(" => Perplexity応答なし")
                continue

            gemini_officers = get_officers_from_gemini(raw_text, company_name)

            if gemini_officers:
                for off in gemini_officers:
                    title = off.get("title", "")
                    lname_jp = off.get("last_name", "")
                    fname_jp = off.get("first_name", "")
                    lname_en = off.get("last_name_en")
                    fname_en = off.get("first_name_en")

                    # ローマ字が空の場合はkakasiで変換
                    if not lname_en and lname_jp:
                        lname_en = to_romaji(lname_jp)
                    if not fname_en and fname_jp:
                        fname_en = to_romaji(fname_jp)

                    officer_rows.append([
                        company_name,
                        raw_text,
                        title,
                        lname_jp,
                        fname_jp,
                        lname_en,
                        fname_en,
                        None  # Hunter推定メールアドレス（後で更新）
                    ])
            else:
                fallback = parse_officer_text(raw_text)
                for off in fallback:
                    title = off.get("title", "")
                    lname_jp = off.get("last_name", "")
                    fname_jp = off.get("first_name", "")
                    lname_en = to_romaji(lname_jp)
                    fname_en = to_romaji(fname_jp)
                    officer_rows.append([
                        company_name,
                        raw_text,
                        title,
                        lname_jp,
                        fname_jp,
                        lname_en,
                        fname_en,
                        None  # 後で更新
                    ])

            time.sleep(1)  # レートリミット考慮

        except Exception as e:
            print(f"【エラー】会社: {company_name} 役員解析中にエラー: {e}")
            # エラーが発生した場合でも、次の会社の処理を続ける
            continue

    # Officer Rows 後処理: 不要エントリ除外＆ハンターメール取得
    if officer_rows:
        filtered_officer_rows = []
        for row in officer_rows:
            # row = [会社名, Perplexity生テキスト, 役職名, 姓(JP), 名(JP), 姓(Romaji), 名(Romaji), Hunterメール]
            # 役職名が空、または無効なキーワードを含む場合は除外
            if row[2] and is_valid_title(row[2]):
                # 不要な文字を削除 (Perplexity取得情報)
                row[1] = re.sub(r'[\[\]\(\)\d]', '', row[1])  # 数字、[]、() を削除
                row[1] = re.sub(r'http\S+', '', row[1])       # URLを削除
                row[1] = re.sub(r'[・\-―、。]+', ' ', row[1])  # 不要な記号を削除
                row[1] = row[1].strip()

                # 役職名から不要な文字/単語を削除
                row[2] = re.sub(r'[\[\]\(\)\d]', '', row[2])
                row[2] = row[2].strip()

                # 姓 (ローマ字) から不要な文字を削除
                row[5] = re.sub(r'[\[\]\(\)\d]', '', row[5])
                row[5] = row[5].strip()

                # 名 (ローマ字) から不要な文字を削除
                row[6] = re.sub(r'[\[\]\(\)\d]', '', row[6])
                row[6] = row[6].strip()

                # Hunterメールアドレス取得
                company_name = row[0]
                # 会社ごとのドメインを一つ選ぶ (複数ある場合は先頭の1つを使用)
                if company_name in job_dict:
                    domain_dict = job_dict[company_name]["email_domain_dict"]
                    if domain_dict:
                        # 先頭のドメインを取る
                        domain = next(iter(domain_dict.keys()))
                    else:
                        domain = ""
                else:
                    domain = ""

                # row[5], row[6] がローマ字の姓・名
                first_en = row[6]  # 名
                last_en = row[5]   # 姓
                # もし empty ならドメイン連携しても意味が薄いが、一応呼び出す
                if first_en or last_en:
                    guessed_email = get_hunter_email(first_en, last_en, domain)
                else:
                    guessed_email = None

                # 書き込む
                row[7] = guessed_email if guessed_email else "N/A"
                filtered_officer_rows.append(row)

        officer_worksheet.update(filtered_officer_rows, "A2")
        print(f"役員情報を {len(filtered_officer_rows)} 件書き込みました。")
    else:
        print("役員情報の書き込み対象がありませんでした。")


if __name__ == "__main__":
    main()