In [30]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_page(url,data):
    # URLからHTMLコンテンツを取得
    response = requests.get(url)
    # HTMLをパース
    soup = BeautifulSoup(response.text, 'html.parser')

    # 各物件の詳細を取得
    for item in soup.find_all("div", class_="cassetteitem"):
        title = item.find("div", class_="cassetteitem_content-title").text.strip()
        address = item.find("li", class_="cassetteitem_detail-col1").text.strip()
        build_info = item.find("li", class_="cassetteitem_detail-col3").text.strip()
        transport = [t.text.strip() for t in item.find_all("div", class_="cassetteitem_detail-text")]
        detail_url = item.find("a", class_="js-cassette_link_href")["href"]

        # 各部屋の詳細を取得
        for room in item.find_all("tr", class_="js-cassette_link"):
            # 'td' タグを全て取得
            tds = room.find_all("td")
            # 'td' タグの中から階数を含むものを探す
            floor = next((td.text.strip() for td in tds if "階" in td.text), "不明")
            rent = room.find("span", class_="cassetteitem_price--rent").text.strip()
            admin_fee = room.find("span", class_="cassetteitem_price--administration").text.strip()
            deposit = room.find("span", class_="cassetteitem_price--deposit").text.strip()
            gratuity = room.find("span", class_="cassetteitem_price--gratuity").text.strip()
            layout = room.find("span", class_="cassetteitem_madori").text.strip()
            size = room.find("span", class_="cassetteitem_menseki").text.strip()

            # リストにデータを追加
            data.append([title, address, build_info] + transport + [floor, rent, admin_fee, deposit, gratuity, layout, size, detail_url])
# データを格納するリストを初期化
data = []

# 基本となるURL
base_url = "https://suumo.jp/jj/chintai/ichiran/FR301FC001/?ar=030&bs=040&ra=013&rn=0045&ek=004531420&cb=0.0&ct=9999999&mb=0&mt=9999999&md=03&md=04&et=9999999&cn=9999999&shkr1=03&shkr2=03&shkr3=03&shkr4=03&sngz=&po1=25&pc=50"

# スクレイピングするページ数
total_pages = 2

for page_number in range(1, total_pages + 1):
    url = f"{base_url}&page={page_number}"
    scrape_page(url,data)

# データフレームの作成
columns = ['Title', 'Address', 'Building Info', 'Transport 1', 'Transport 2', 'Transport 3', 'Floor', 'Rent', 'Administration Fee', 'Deposit', 'Gratuity', 'Layout', 'Size', 'Detail URL']
df = pd.DataFrame(data, columns=columns)

# データフレームをcsvにおとす
df.to_csv('data.csv', index=False)