In [None]:
import requests
import os
import re
from bs4 import BeautifulSoup
import pandas as pd
from tqdm import tqdm

In [None]:
class PropertyScraper:
    def __init__(self, transaction_type='for-sale', filename='property_data.csv'):
        self.transaction_type = transaction_type
        self.filename = filename
        self.base_url = f"https://www.property24.com/{self.transaction_type}/advanced-search/results/p2?sp=pid%3d9%2c7%2c8%2c3%2c2%2c5%2c6%2c14%2c1"
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'
        }
        self.titles = []
        self.prices = []
        self.links = []
        self.areas = []
        self.locations = []

    def extract_numbers(self, text):
        numbers = re.findall(r'\d+', text)
        return ''.join(numbers)

    def get_last_page_number(self):
        response = requests.get(self.base_url, headers=self.headers)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            page_no = int(soup.find('ul', class_='pagination').find_all('a')[-1].get('data-pagenumber'))
            return page_no
        return 0

    def scrape_page(self, page_number):
        url = f"https://www.property24.com/{self.transaction_type}/advanced-search/results/p{page_number}?sp=pid%3d9%2c7%2c8%2c3%2c2%2c5%2c6%2c14%2c1"
        response = requests.get(url, headers=self.headers)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            elements = soup.find_all(class_=lambda x: x and '_tileContainer' in x)

            for element in elements:
                title = price = link = area = location = "NA"

                if element.find('span', class_='p24_title'):
                    title = element.find('span', class_='p24_title').text
                    price = element.find('span', class_='p24_price').get('content')
                if element.find('div', class_='p24_promotedTile'):
                    title = element.find('div', class_='p24_promotedTile').get('title')
                    price = self.extract_numbers(element.find('div', class_='p24_price').text)
                link = "https://property24.com" + element.find('a').get('href')

                if element.find('img', class_='p24_sizeIcon'):
                    area = element.find('img', class_='p24_sizeIcon').find_next_sibling().text

                if element.find('span', class_='p24_location'):
                    location = element.find('span', class_='p24_location').text

                self.titles.append(title)
                self.prices.append(price)
                self.links.append(link)
                self.areas.append(area)
                self.locations.append(location)

    def save_data(self, filename=None):
        if filename is None:
            filename = self.filename
        data = {
            "Title": self.titles,
            "Price": self.prices,
            "Area": self.areas,
            "Locations": self.locations,
            "Link": self.links,
        }
        df = pd.DataFrame(data)
        df.to_csv(filename, index=False)

    def run_scraper(self):
        last_page = self.get_last_page_number()
        for page_number in tqdm(range(1, last_page + 1)):
            self.scrape_page(page_number)
            if page_number % 100 == 0:
                self.save_data()

In [None]:
if __name__ == "__main__":
    scraper = PropertyScraper("for-rent", "for_rent.csv")
    scraper.run_scraper()