In [14]:
import requests
from bs4 import BeautifulSoup

def scrape_events(page=1):
    # ページ番号をクエリパラメータで指定
    # 例: https://www.m-messe.co.jp/event/?page=2 で2ページ目
    url = "https://www.m-messe.co.jp/event/"
    params = {
        "page": page
    }    
    response = requests.get(url, params=params)
    response.raise_for_status()
    soup = BeautifulSoup(response.content, "html.parser", from_encoding='utf-8')

    event_list = soup.select("ul.eventList > li.eventInr.clear")
    if not event_list:
        # イベントが見つからなければ空を返す
        return []

    events = []
    for li in event_list:
        # 各イベント情報を取得
        a_tag = li.find("a")
        if a_tag:
            detail_url = a_tag.get("href")
            if detail_url and detail_url.startswith("/event/detail/"):
                detail_url = "https://www.m-messe.co.jp" + detail_url
            else:
                detail_url = None
        else:
            detail_url = None

        category_div = li.select_one(".category")
        category = category_div.get_text(strip=True) if category_div else None

        date_div = li.select_one(".date")
        date_text = date_div.get_text(strip=True) if date_div else None

        title_div = li.select_one(".eventTit")
        title = title_div.get_text(strip=True) if title_div else None

        target_div = li.select_one(".target")
        target_text = target_div.get_text(strip=True).replace("対象", "") if target_div else None

        thumb_div = li.select_one(".thumb img")
        image_url = thumb_div.get("src") if thumb_div else None
        if image_url and image_url.startswith("/"):
            image_url = "https://www.m-messe.co.jp" + image_url

        events.append({
            "title": title,
            "date": date_text,
            "category": category,
            "target": target_text,
            "detail_url": detail_url,
            "image_url": image_url
        })

    return events

def scrape_all_events(max_pages=10):
    all_events = []
    for page in range(1, max_pages+1):
        events = scrape_events(page)
        if not events:
            # これ以上イベントがない場合は終了
            break
        all_events.extend(events)
    return all_events


all_event_data = scrape_all_events(max_pages=100)  # ページ数は必要に応じて調整
# for event in all_event_data:
#     print(event)

In [13]:
all_event_data[0]

{'title': '【Identity V第五人格】Merry Starry Christmas ～月の河マーケット～&2024年秋季IJL決勝戦',
 'date': '2024.12.20(金) 〜 2024.12.22(日)',
 'category': 'イベント',
 'target': '一般',
 'detail_url': 'https://www.m-messe.co.jp/event/detail/7951',
 'image_url': 'https://www.m-messe.co.jp/saved/images/event/24/52/ab04abbb155bfa826b5b7f2640392aaa23fc2452.jpg'}

In [15]:
import requests
from bs4 import BeautifulSoup

def scrape_event_detail(detail_url):
    response = requests.get(detail_url)
    response.encoding = response.apparent_encoding
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # 詳細ページから取得可能な情報を抜き出す
    event_data = {}
    
    # タイトル
    title_tag = soup.select_one('.eventDetail .eventTit')
    event_data['title'] = title_tag.get_text(strip=True) if title_tag else None
    
    # カテゴリ
    category_tag = soup.select_one('.eventDetail .category')
    event_data['category'] = category_tag.get_text(strip=True) if category_tag else None
    
    # 説明文
    disc_tag = soup.select_one('.eventDetail .disc')
    event_data['description'] = disc_tag.get_text(strip=True) if disc_tag else None
    
    # 開催期間・開催時間・対象者/入場料・利用施設・主催者・URLを<dl>から取得
    # 開催期間：dl.date > dd > div.main にある
    date_dd = soup.select_one('.eventDetail dl.date dd')
    if date_dd:
        date_main = date_dd.select_one('.main')
        event_data['date_period'] = date_main.get_text(strip=True) if date_main else None

    # 開催時間：dl.time > dd > div.main, div.note
    time_dd = soup.select_one('.eventDetail dl.time dd')
    if time_dd:
        time_main = time_dd.select_one('.main')
        time_note = time_dd.select_one('.note')
        event_data['time_main'] = time_main.get_text(strip=True) if time_main else None
        event_data['time_note'] = time_note.get_text(strip=True) if time_note else None

    # 対象者/入場料：dl.person > dd > div.authorP
    person_dd = soup.select_one('.eventDetail dl.person dd .authorP')
    if person_dd:
        target_div = person_dd.select_one('.target')
        price_div = person_dd.select_one('.price')
        event_data['target'] = target_div.get_text(strip=True) if target_div else None
        event_data['price'] = price_div.get_text(strip=True) if price_div else None

    # 利用施設：dl.facility > dd > .hall
    facility_dd = soup.select('.eventDetail dl.facility dd .hall')
    if facility_dd:
        # 利用施設が複数ある場合もリストで取得
        facilities = []
        for hall in facility_dd:
            hall_name = hall.select_one('.hallName')
            hall_no = hall.select_one('.hallNo')
            facility_str = ''
            if hall_name:
                facility_str += hall_name.get_text(strip=True)
            if hall_no:
                facility_str += ' ' + hall_no.get_text(strip=True)
            facilities.append(facility_str.strip())
        event_data['facilities'] = facilities

    # 主催者：dl.organizer > dd
    organizer_dd = soup.select_one('.eventDetail dl.organizer dd')
    event_data['organizer'] = organizer_dd.get_text(strip=True) if organizer_dd else None

    # URL：dl.contact > dd > .url > a
    url_dd = soup.select_one('.eventDetail dl.contact dd .url a')
    event_data['official_url'] = url_dd.get('href') if url_dd else None

    # サムネイル画像（詳細ページ）
    thumb_img = soup.select_one('.eventDetail .thumb img')
    if thumb_img:
        img_src = thumb_img.get('src')
        # 絶対URLでなければ補完
        if img_src and img_src.startswith('/'):
            img_src = 'https://www.m-messe.co.jp' + img_src
        event_data['detail_image_url'] = img_src
    else:
        event_data['detail_image_url'] = None
    
    return event_data


def scrape_events(page=1):
    url = "https://www.m-messe.co.jp/event/"
    params = {
        "page": page
    }
    response = requests.get(url, params=params)
    response.encoding = response.apparent_encoding
    soup = BeautifulSoup(response.text, "html.parser")

    event_list = soup.select("ul.eventList > li.eventInr.clear")
    if not event_list:
        return []

    events = []
    for li in event_list:
        a_tag = li.find("a")
        if a_tag:
            detail_url = a_tag.get("href")
            if detail_url and detail_url.startswith("/event/detail/"):
                detail_url = "https://www.m-messe.co.jp" + detail_url
            else:
                detail_url = None
        else:
            detail_url = None

        category_div = li.select_one(".category")
        category = category_div.get_text(strip=True) if category_div else None

        date_div = li.select_one(".date")
        date_text = date_div.get_text(strip=True) if date_div else None

        title_div = li.select_one(".eventTit")
        title = title_div.get_text(strip=True) if title_div else None

        target_div = li.select_one(".target")
        target_text = target_div.get_text(strip=True).replace("対象", "") if target_div else None

        thumb_div = li.select_one(".thumb img")
        image_url = thumb_div.get("src") if thumb_div else None
        if image_url and image_url.startswith("/"):
            image_url = "https://www.m-messe.co.jp" + image_url

        events.append({
            "title": title,
            "date": date_text,
            "category": category,
            "target": target_text,
            "detail_url": detail_url,
            "image_url": image_url
        })

    return events

def scrape_all_events(max_pages=10):
    all_events = []
    for page in range(1, max_pages+1):
        events = scrape_events(page)
        if not events:
            break
        # detailページにもアクセスし、情報追加
        for event in events:
            if event['detail_url']:
                detail_data = scrape_event_detail(event['detail_url'])
                event.update(detail_data)  # イベント情報に詳細情報をマージ
        all_events.extend(events)
    return all_events


all_event_data = scrape_all_events(max_pages=2)  # ページ数は必要に応じて調整

In [18]:
all_event_data[19]

{'title': 'マイナビ転職フェア 幕張',
 'date': '2025.02.11(火)',
 'category': 'イベント',
 'target': '一般',
 'detail_url': 'https://www.m-messe.co.jp/event/detail/7964',
 'image_url': 'https://www.m-messe.co.jp/saved/images/event/23/4c/1c9b95e4c6033b4ac92ad77e814162cddd1b234c.jpg',
 'description': '転職希望者向け合同企業説明会',
 'date_period': '2025年2月11日(火・祝)',
 'time_main': '11:00～16:00',
 'time_note': '(受付 10:00～15:30)',
 'facilities': ['国際会議場 コンベンションホール'],
 'organizer': '株式会社マイナビ',
 'official_url': 'https://tenshoku.mynavi.jp/event/chiba/20250211A/',
 'detail_image_url': 'https://www.m-messe.co.jp/cache/images/event/23/4c/1c9b95e4c6033b4ac92ad77e814162cddd1b234c.1200x1200.none.jpg'}