In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

base_url = "https://shachomeikan.jp"
start_url = "/prefecture/23"
headers = {"User-Agent": "Mozilla/5.0"}

all_urls = []

while start_url:
    url = base_url + start_url
    print(f"取得中: {url}")
    
    try:
        res = requests.get(url, headers=headers)
        soup = BeautifulSoup(res.text, "html.parser")
        
        # <ul class="area"> 内の <a href> を取得
        for a in soup.select("ul.area li.link a"):
            href = a.get("href")
            if href:
                full_url = base_url + href
                all_urls.append(full_url)
        
        # 次のページがあるか確認
        next_link = soup.select_one('a.mod-pager-item_next')
        if next_link and next_link.get("href"):
            start_url = next_link.get("href")
        else:
            start_url = None  # 終了
    except Exception as e:
        print(f"スキップ：{url} エラー: {e}")
        break

    time.sleep(1)  # サーバーにやさしく

# 保存
df = pd.DataFrame(all_urls, columns=["URL"])
df.to_csv("all_company_urls.csv", index=False, encoding="utf-8-sig")
print("✅ 完了：会社URL一覧を all_company_urls.csv に保存しました")

取得中: https://shachomeikan.jp/prefecture/23
取得中: https://shachomeikan.jp/prefecture/23?page=2
取得中: https://shachomeikan.jp/prefecture/23?page=3
取得中: https://shachomeikan.jp/prefecture/23?page=4
取得中: https://shachomeikan.jp/prefecture/23?page=5
取得中: https://shachomeikan.jp/prefecture/23?page=6
✅ 完了：会社URL一覧を all_company_urls.csv に保存しました


In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import time

# CSVファイルを読み込む（URLのみ）
df = pd.read_csv("all_company_urls.csv")
urls = df["URL"].tolist()

# 結果格納用
results = []
headers = {"User-Agent": "Mozilla/5.0"}

for url in urls:
    try:
        res = requests.get(url, headers=headers, timeout=10)
        soup = BeautifulSoup(res.text, "html.parser")

        table = soup.select_one("table.company-information")
        rows = table.select("tr") if table else []

        company_name = "N/A"
        ceo_name = "N/A"

        for row in rows:
            th = row.select_one("th")
            td = row.select_one("td")
            if th and td:
                label = th.get_text(strip=True)
                value = td.get_text(strip=True)
                if "社名" in label:
                    company_name = value
                elif "代表者名" in label:
                    ceo_name = value
                elif "WEBサイト" in label:
                    url = value

        results.append({
            "社名": company_name,
            "代表者名": ceo_name,
            "会社URL": url
        })

    except Exception as e:
        print(f"スキップ: {url} エラー: {e}")
        results.append({
            "社名": "N/A",
            "代表者名": "N/A",
            "会社URL": url
        })

    time.sleep(1)  # サーバー負荷を避けるため

# 保存
df_result = pd.DataFrame(results)
df_result.to_csv("company_info_list.csv", index=False, encoding="utf-8-sig")
print("✅ 完了：company_info_list.csv に保存しました")

✅ 完了：company_info_list.csv に保存しました
