In [1]:
# import library
import requests
from bs4 import BeautifulSoup
import time
import pandas as pd

In [3]:
# fungsi scrape data rumah
def scrape_city(city, url, page=1):
  print(f"scrape halaman {page} untuk kota {city}")

  headers = {
      "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
  }

  response = requests.get(url +f"?page={page}", headers=headers)
  # check response
  print(response.status_code)

  # import ke beautifulsoup
  soup = BeautifulSoup(response.text, "html.parser")

  # get container utama
  container = soup.find("div", {"class": "card-list-section"})

  # get elemen rumah
  featured = container.find_all('div', {'class': 'featured-card-component'})

  print(f'Jumlah rumah yang ditemukan: {len(featured)}')


  # menampung data
  data = []

  # loop untuk setiap rumah
  for idx, house in enumerate(featured):
    print(f'Scraping rumah ke-{idx + 1}')

    # container untuk rumah
    content = house.find('div', {'class': 'card-featured__middle-section'})

    if content is None:
      continue

    # ambil data harga
    price = content.find('div', {'class': 'card-featured__middle-section__price'}).text.strip()
    # print(price)

    # ambil judul
    title = content.contents[2].text.strip()  if len(content.contents) > 2 else ""

    # ambil lokasi rumah, anak ke-4 dari content
    location = content.contents[3].text.strip()  if len(content.contents) > 3 else ""

    features = content.find('div', {'class': 'card-featured__middle-section__attribute'})
    attributes = features.find_all('span', {'class': 'attribute-text'})


    bedrooms = attributes[0].text.strip()  if len(attributes) > 0 else ""
    bathrooms = attributes[1].text.strip() if len(attributes) > 1 else ""
    garage = attributes[2].text.strip() if len(attributes) > 2 else ""

    area = features.contents[1].text.strip()  if len(features.contents) > 1 else "" # anak ke-2
    building_area = features.contents[2].text.strip()  if len(features.contents) > 2 else "" # anak ke-3

    data.append({
            'city': city,
            'title': title,
            'price': price,
            'location': location,
            'area': area,
            'building_area': building_area,
            'bedrooms': bedrooms,
            'bathrooms': bathrooms,
            'garage': garage
    })

  return data

In [2]:
# fungsi menjalankan scraper

def start_scrape(cities, max_page_per_city = 10):
  data = []

  for city, url in cities.items():
    for page in range(1, max_page_per_city + 1):
      page_data = scrape_city(city, url, page)
      if(len(page_data)):
        data.extend(page_data)

      time.sleep(10)

  return data

In [None]:
# memilih kota dan url lalu menjalankan scraper

cities_sulawesi = {
    "makassar": "https://www.rumah123.com/jual/makassar/rumah/",
    "gowa": "https://www.rumah123.com/jual/gowa/rumah/",
    "maros": "https://www.rumah123.com/jual/maros/rumah/",
    "kendari": "https://www.rumah123.com/jual/kendari/rumah/",
    "palu": "https://www.rumah123.com/jual/palu/rumah/",
    "parepare": "https://www.rumah123.com/jual/pare-pare/rumah/",
    "palopo": "https://www.rumah123.com/jual/palopo/rumah/",
    "bulukumba": "https://www.rumah123.com/jual/bulukumba/rumah/",
    "takalar": "https://www.rumah123.com/jual/takalar/rumah/"
}

cities_jatim = {
    "surabaya": "https://www.rumah123.com/jual/surabaya/rumah/",
    "malang": "https://www.rumah123.com/jual/malang/rumah/",
    "sidoarjo": "https://www.rumah123.com/jual/sidoarjo/rumah/"

}

cities_id_timur = {
    "ambon": "https://www.rumah123.com/jual/ambon/rumah/",
    "ntt": "https://www.rumah123.com/jual/manggarai-barat/apartemen/",
    "bali_gianyar": "https://www.rumah123.com/jual/gianyar/rumah/",
    "bali_denpasar": "https://www.rumah123.com/jual/denpasar/rumah/",
    "bali_canggu": "https://www.rumah123.com/jual/badung/canggu/rumah/"
}

cities_jkt = {
    "Jakarta Barat": "https://www.rumah123.com/jual/jakarta-barat/rumah/",
    "Jakarta Timur": "https://www.rumah123.com/jual/jakarta-timur/rumah/",
    "Jakarta Selatan": "https://www.rumah123.com/jual/jakarta-selatan/rumah/",
    "Jakarta Pusat": "https://www.rumah123.com/jual/jakarta-pusat/rumah/",
    "Jakarta Utara": "https://www.rumah123.com/jual/jakarta-utara/rumah/",
    "Bogor": "https://www.rumah123.com/jual/bogor/rumah/",
    "Depok": "https://www.rumah123.com/jual/depok/rumah/",
    "Tangerang": "https://www.rumah123.com/jual/tangerang/rumah/",
    "Tangerang Selatan": "https://www.rumah123.com/jual/tangerang-selatan/rumah/",
    "Bekasi": "https://www.rumah123.com/jual/bekasi/rumah/",
}

# sesuaikan nama kelompok kota yang ingin di-scrape
data = start_scrape(cities_jkt)

# simpan ke dalam csv
df = pd.DataFrame(data)

df.to_csv("harga_rumah_id_timur.csv", index=False)