In [1]:
pip install selenium webdriver-manager

Collecting selenium
  Downloading selenium-4.32.0-py3-none-any.whl.metadata (7.5 kB)
Collecting webdriver-manager
  Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.30.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting python-dotenv (from webdriver-manager)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Collecting sortedcontainers (from trio~=0.17->selenium)
  Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading selenium-4.32.0-py3-none-any.whl (9.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [5]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

# Service オブジェクトを作成し、明示的に渡す
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)

In [14]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
import time

# Step 1: Chrome起動
options = Options()
# options.add_argument('--headless')  # デバッグ時はコメントアウト
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

# Step 2: NAVITIMEバス時刻表ページへアクセス
url = "https://www.navitime.co.jp/diagram/bus/00017089/00004306/0/"
driver.get(url)
time.sleep(5)
html = driver.page_source
driver.quit()

# Step 3: BeautifulSoupでパース
soup = BeautifulSoup(html, "html.parser")

# Step 4: 時刻抽出関数（方面別）
def extract_times_by_direction(soup, div_id):
    times = []
    section = soup.find("div", id=div_id)
    if section:
        hour_blocks = section.select("dl.dl_0")
        for block in hour_blocks:
            hour_tag = block.find('dt')
            if hour_tag:
                hour = hour_tag.text.strip().zfill(2)
                minutes = block.find_all('div', style=lambda s: s and 'text-decoration: underline' in s)
                for m in minutes:
                    minute = m.text.strip().zfill(2)
                    try:
                        dt = datetime.strptime(f"{hour}:{minute}", "%H:%M")
                        times.append(dt)
                    except:
                        continue
    return times

# Step 5: 集計関数（15分単位にバケット分け）
def count_by_time_bins(times):
    start_time = datetime.strptime("06:00", "%H:%M")
    end_time = datetime.strptime("23:00", "%H:%M")
    bins = []
    labels = []
    while start_time < end_time:
        end_bin = start_time + timedelta(minutes=15) - timedelta(minutes=1)
        bins.append((start_time, end_bin))
        labels.append(f"{start_time.strftime('%H:%M')}-{end_bin.strftime('%H:%M')}")
        start_time = end_bin + timedelta(minutes=1)

    bucket_counts = {label: 0 for label in labels}
    for t in times:
        for (start, end), label in zip(bins, labels):
            if start <= t <= end:
                bucket_counts[label] += 1
                break

    return pd.DataFrame([bucket_counts.values()], columns=bucket_counts.keys(), index=["Bus Count"])

# Step 6: それぞれの方面を取得・集計
times_kameido = extract_times_by_direction(soup, "d_0_0")
times_tsukiji = extract_times_by_direction(soup, "d_1_0")

df_kameido = count_by_time_bins(times_kameido)
df_tsukiji = count_by_time_bins(times_tsukiji)

# Step 7: 表示 or 保存
print("🚌 亀戸駅前方面（d_0_0）")
print(df_kameido)

print("\n🚌 築地駅前方面（d_1_0）")
print(df_tsukiji)

# Optional: Excel出力
# df_kameido.to_excel("kameido_schedule.xlsx")
# df_tsukiji.to_excel("tsukiji_schedule.xlsx")

🚌 亀戸駅前方面（d_0_0）
           06:00-06:14  06:15-06:29  06:30-06:44  06:45-06:59  07:00-07:14  \
Bus Count            0            0            0            0            0   

           07:15-07:29  07:30-07:44  07:45-07:59  08:00-08:14  08:15-08:29  \
Bus Count            0            0            0            0            0   

           ...  20:30-20:44  20:45-20:59  21:00-21:14  21:15-21:29  \
Bus Count  ...            0            0            0            0   

           21:30-21:44  21:45-21:59  22:00-22:14  22:15-22:29  22:30-22:44  \
Bus Count            0            0            0            0            0   

           22:45-22:59  
Bus Count            0  

[1 rows x 68 columns]

🚌 築地駅前方面（d_1_0）
           06:00-06:14  06:15-06:29  06:30-06:44  06:45-06:59  07:00-07:14  \
Bus Count            0            0            0            0            0   

           07:15-07:29  07:30-07:44  07:45-07:59  08:00-08:14  08:15-08:29  \
Bus Count            0            0            

In [15]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
from datetime import datetime, timedelta
import time
import re

# ▼ Chromeのオプション設定（ヘッドレス）
options = Options()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

# ▼ ナビタイムの錦糸町駅（新宿方面）電車時刻表ページを開く
url = "https://www.navitime.co.jp/diagram/timetable?node=00001824&lineId=00000169&updown=0"
driver.get(url)
time.sleep(3)

# ▼ 時刻データ抽出
hour_elements = driver.find_elements('css selector', '.hour')
minute_elements = driver.find_elements('css selector', '.minute')

time_strings = []
for hour_elem, minute_elem in zip(hour_elements, minute_elements):
    hour = hour_elem.text.strip()
    minutes = minute_elem.text.strip().split()
    for minute in minutes:
        if re.match(r'^\d{1,2}$', minute):
            time_strings.append(f"{hour}:{minute.zfill(2)}")

driver.quit()

# ▼ 文字列 → 時刻オブジェクト
time_objects = [datetime.strptime(t, "%H:%M").time() for t in time_strings]

# ▼ 15分区切り時間帯定義（06:00〜23:59）
start_time = datetime.strptime("06:00", "%H:%M")
end_time = datetime.strptime("23:59", "%H:%M")
labels = []
bins = []
while start_time < end_time:
    bin_start = start_time.time()
    bin_end = (start_time + timedelta(minutes=14)).time()
    label = f"{bin_start.strftime('%H:%M')}-{bin_end.strftime('%H:%M')}"
    labels.append(label)
    bins.append((bin_start, bin_end))
    start_time += timedelta(minutes=15)

# ▼ 各時間帯の本数カウント
count_dict = {label: 0 for label in labels}
for t in time_objects:
    for (bin_start, bin_end), label in zip(bins, labels):
        if bin_start <= t <= bin_end:
            count_dict[label] += 1
            break

# ▼ DataFrame化（横持ち形式）
df = pd.DataFrame([count_dict.values()], columns=count_dict.keys(), index=["Train Count"])

In [16]:
df

Unnamed: 0,06:00-06:14,06:15-06:29,06:30-06:44,06:45-06:59,07:00-07:14,07:15-07:29,07:30-07:44,07:45-07:59,08:00-08:14,08:15-08:29,...,21:30-21:44,21:45-21:59,22:00-22:14,22:15-22:29,22:30-22:44,22:45-22:59,23:00-23:14,23:15-23:29,23:30-23:44,23:45-23:59
Train Count,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
