In [None]:
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime
import re


def scrape_movie_schedule(html_content):
    # BeautifulSoupオブジェクトの作成
    soup = BeautifulSoup(html_content, "html.parser")

    # 映画情報を格納するリスト
    movies_data = []

    # スケジュール表から情報を取得
    schedule_table = soup.find("table", class_="sche-table")

    # 日付の取得
    dates = []
    date_row = schedule_table.find("tr")
    for date_cell in date_row.find_all("td", class_="sche-td-2"):
        dates.append(date_cell.text.strip())

    # 各作品の情報を取得
    movie_boxes = soup.find_all("div", class_="box")

    for box in movie_boxes:
        try:
            # 映画のIDを取得
            movie_id = box.find("a").get("id")

            # タイトルを取得
            title_elem = box.find("span", class_="eiga-title")
            if title_elem:
                title = title_elem.text.strip()
            else:
                continue

            # 基本情報の取得
            stuff_elem = box.find("p", class_="stuff")
            info = stuff_elem.text.strip() if stuff_elem else ""

            # 上映時間の取得
            day_elem = box.find("p", class_="day")
            schedule = day_elem.text.strip() if day_elem else ""

            # 料金情報の取得
            price_elem = box.find("p", class_="price")
            price = price_elem.text.strip() if price_elem else ""

            # データをリストに追加
            movies_data.append(
                {
                    "id": movie_id,
                    "title": title,
                    "info": info,
                    "schedule": schedule,
                    "price": price,
                }
            )

        except Exception as e:
            print(f"Error processing movie: {e}")

    # DataFrameの作成
    df = pd.DataFrame(movies_data)
    return df


# 使用例


: 

In [None]:
html_content = "https://shimotakaidocinema.com/schedule/schedule.html"

df = scrape_movie_schedule(html_content)
df.head()