In [None]:
%pip install -U requests beautifulsoup4

In [5]:
import requests
from bs4 import BeautifulSoup

def extract_properties(soup: BeautifulSoup):
    """検索結果のWebページから物件情報を抽出する"""

    properties = []
    groups = soup.select("div.property")

    for i, group in enumerate(groups):
        if i % 20 != 0:
            continue # 重複物件を除去するため20おきに

        title = group.select_one(".property_inner-title > :first-child").get_text() # 物件名

        cols = group.select("td.detailbox-property-col")

        prices = cols[0].select("div")
        rent = float(prices[0].get_text()[:-2]) # 家賃
        try:
            manage_fee = float(prices[1].get_text()[4:-1]) / 10000 # 管理費
        except:
            manage_fee = 0

        info = cols[2].select("div")
        layout = info[0].get_text()
        area = float(info[1].get_text()[:-2]) # 面積

        info2 = cols[3].select("div")
        try:
            age = int(info2[1].get_text()[1:-1]) # 築年数
        except:
            age = 0

        address = cols[4].get_text().strip()

        access = group.select_one("div.detailnote > div:first-child").select("div")
        access = access[0].get_text() # 最も近い駅からのアクセス
        distance = int(access.split(" ")[-1][1:-1]) # 最も近い駅からの徒歩分

        image_urls = [
            img.attrs["rel"]
            for img in group.select_one("ul.js-imageView").select("img.js-linkImage")[
                :5
            ]
        ] # 物件の画像（先頭5つ）

        properties.append(
            {
                "title": title,
                "address": address,
                "rent": round(rent + manage_fee, 2), # 賃料は管理費を含める
                "layout": layout,
                "area": area,
                "age": age,
                "access": access,
                "distance": distance,
                "image_urls": image_urls,
            }
        )

    return properties


# 1ページ目についてテスト
pageId = 1
url = f"https://suumo.jp/jj/chintai/ichiran/FR301FC005/?ar=030&bs=040&ta=13&cb=0.0&ct=9999999&mb=0&mt=9999999&et=9999999&cn=9999999&shkr1=03&shkr2=03&shkr3=03&shkr4=03&ekInput=35340&nk=1&tj=10&sngz=&po1=09&po2=99&pc=100&page={pageId}"
res = requests.get(url)
soup = BeautifulSoup(res.text, "html.parser")
extract_properties(soup)

[{'title': 'ファブール千川',
  'address': '東京都板橋区大谷口上町',
  'rent': 4.7,
  'layout': 'ワンルーム',
  'area': 14.08,
  'age': 50,
  'access': '東京メトロ有楽町線/千川駅 歩15分',
  'distance': 15,
  'image_urls': ['https://img01.suumo.com/front/gazo/fr/bukken/619/100367847619/100367847619_gw.jpg',
   'https://img01.suumo.com/front/gazo/fr/bukken/619/100367847619/100367847619_11w.jpg',
   'https://img01.suumo.com/front/gazo/fr/bukken/619/100367847619/100367847619_rw.jpg',
   'https://img01.suumo.com/front/gazo/fr/bukken/619/100367847619/100367847619_1w.jpg',
   'https://img01.suumo.com/front/gazo/fr/bukken/619/100367847619/100367847619_2w.jpg']},
 {'title': 'ルネプラザ 106号室',
  'address': '東京都豊島区池袋３',
  'rent': 5.2,
  'layout': 'ワンルーム',
  'area': 15.0,
  'age': 40,
  'access': 'ＪＲ山手線/池袋駅 歩10分',
  'distance': 10,
  'image_urls': ['https://img01.suumo.com/front/gazo/fr/bukken/839/100368075839/100368075839_gw.jpg',
   'https://img01.suumo.com/front/gazo/fr/bukken/839/100368075839/100368075839_rw.jpg',
   'https://img01.su

In [7]:
import json
import time

samples = []

for i in range(20): # 先頭20ページまでの結果を取得    
    url = f"https://suumo.jp/jj/chintai/ichiran/FR301FC005/?ar=030&bs=040&ta=13&cb=0.0&ct=9999999&mb=0&mt=9999999&et=9999999&cn=9999999&shkr1=03&shkr2=03&shkr3=03&shkr4=03&ekInput=35340&nk=1&tj=10&sngz=&po1=09&po2=99&pc=100&page={i+1}"
    res = requests.get(url)
    soup = BeautifulSoup(res.text, "html.parser")
    samples.extend(extract_properties(soup))
    time.sleep(1)    

In [8]:
# 結果をjsonファイルにエクスポート
with open("samples.json", "w", encoding="utf-8") as f:
    json.dump(samples, f, indent=2, ensure_ascii=False)