In [1]:
from time import sleep
from contextlib import contextmanager

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager


@contextmanager
def chrome_driver():
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    driver = webdriver.Chrome(executable_path=ChromeDriverManager().install(), options=chrome_options)
    try:
        yield driver
    finally:
        driver.quit()


def get_html(url: str, load_time_seconds: int = 5) -> str:
    with chrome_driver() as driver:
        driver.get(url)
        sleep(load_time_seconds)
        return driver.page_source

In [2]:
from datetime import datetime
from dataclasses import dataclass, asdict

from bs4 import BeautifulSoup

URL = "https://sssb.se/soka-bostad/sok-ledigt/lediga-bostader/?pagination=0&paginationantal=0"

@dataclass(frozen=True)
class Listing:
    url: str
    apartment_type: str
    adress: str
    region: str
    floor: int
    square_meters: int
    rent: int
    move_in_date: datetime
    
    def asdict(self) -> dict:
        return asdict(self)
    

def extract_int(text: str) -> int:
    digits = "".join(character 
                for character in text
                if character.isdigit())
    digits = "0" + digits # 0 if text does not contain any digits
    return int(digits)

def parse_raw_listing(raw_listing: BeautifulSoup) -> Listing:
    # URL and apartment type
    title_element = raw_listing.find(attrs={"class": "ObjektTyp"})
    url = title_element.find(href=True).get("href", "")
    apartment_type = title_element.text.strip()
    
    # Adress
    adress = raw_listing.find(attrs={"class": "ObjektAdress"}).text.strip()
    
    # Region
    region = raw_listing.find("dd", attrs={"class": "ObjektOmrade"}).text.strip()
    
    # Floor
    floor_text = raw_listing.find("dd", attrs={"class": "ObjektVaning hidden-phone"}).text.strip()
    floor = extract_int(floor_text)
    
    # Square meters
    square_meters_text = raw_listing.find("dd", attrs={"class": "ObjektYta"}).text.strip()
    square_meters_text = square_meters_text.replace("²", "") # Fucks up the int extraction
    square_meters = extract_int(square_meters_text)
    
    # Rent
    rent_text = raw_listing.find("dd", attrs={"class": "ObjektHyra"}).text.replace(u"\xa0", u" ").strip()
    rent = extract_int(rent_text)
    
    # Move in date
    move_in_date_text = raw_listing.find("dd", attrs={"class": "ObjektInflytt hidden-phone"}).text.strip()
    move_in_date = datetime.strptime(move_in_date_text, "%Y-%m-%d")
    
    return Listing(
        url=url,
        apartment_type=apartment_type,
        adress=adress,
        region=region,
        floor=floor,
        square_meters=square_meters,
        rent=rent,
        move_in_date=move_in_date,
    )


def get_listings() -> list[Listing]:
    html = get_html(URL)
    soup = BeautifulSoup(html)
    raw_listings = soup.find_all(attrs={"class": "Box ObjektListItem"})
    return [parse_raw_listing(raw_listing)
            for raw_listing in raw_listings]
    

listings = get_listings()
listings

  driver = webdriver.Chrome(executable_path=ChromeDriverManager().install(), options=chrome_options)


[Listing(url='https://sssb.se/soka-bostad/sok-ledigt/lediga-bostader/lagenhet/?refid=4c4d35384972514338665a38694330506365726a586f71765a4166536351445933416e414c525767706c6b3d', apartment_type='1 rum & pentry', adress='David Bagares Gata 6 / 1407', region='Vätan', floor=4, square_meters=37, rent=7091, move_in_date=datetime.datetime(2023, 11, 16, 0, 0)),
 Listing(url='https://sssb.se/soka-bostad/sok-ledigt/lediga-bostader/lagenhet/?refid=4b574f3348384739456536327543414f7435304c74636e746d57516d4633337278364677656d38795166773d', apartment_type='2 rum & kök', adress='Studentbacken 21 / 1117', region='Jerum', floor=1, square_meters=41, rent=6615, move_in_date=datetime.datetime(2023, 11, 16, 0, 0)),
 Listing(url='https://sssb.se/soka-bostad/sok-ledigt/lediga-bostader/lagenhet/?refid=5a4c692b2b4e797762476661456973345a6c535252702b6a57774a497942594d2b6d6b4d68526f63356b343d', apartment_type='Rum i korridor', adress='Kungshamra 31 / 1214', region='Kungshamra', floor=2, square_meters=18, rent=3642, 

In [4]:
listings[5].asdict()

{'url': 'https://sssb.se/soka-bostad/sok-ledigt/lediga-bostader/lagenhet/?refid=313444334f7134314e62565045656756374d6d794c6563696e6b374c346d4e64795745372b336950334c453d',
 'apartment_type': 'Rum i korridor',
 'adress': 'Värtavägen 66 / 1316',
 'region': 'Fyrtalet',
 'floor': 3,
 'square_meters': 19,
 'rent': 4291,
 'move_in_date': datetime.datetime(2023, 12, 1, 0, 0)}