In [4]:
import requests

In [5]:
login_csrf_url = "https://mietplan-dresden.de/login/"

In [6]:
from bs4 import BeautifulSoup

def extract_csrf_token(html: str) -> str:
    soup = BeautifulSoup(html, "html.parser")

    # Find the hidden input field containing the CSRF token
    input_element = soup.find("input", {"name": "csrf"})
    if input_element is None:
        raise ValueError("No login form found for csrf token")

    csrf_token = input_element["value"] # type: ignore

    if not csrf_token:
        raise ValueError("Empty CSRF token found in the login form")


    return str(csrf_token)

In [7]:
def fetch_csrf_token(session: requests.Session) -> str:
    response = session.get(login_csrf_url)
    response.raise_for_status()
    return extract_csrf_token(response.content.decode(encoding='latin-1'))

In [8]:
from pathlib import Path
import json


credentials = json.load(Path("mietplan_credentials.json").open())

In [9]:
def login(session: requests.Session, username: str, password: str) -> None:
    csrf_token = fetch_csrf_token(session)

    login_url = "https://mietplan-dresden.de/auctores/scs/auctores.controller.web.InfoLoginMultiController"

    headers = {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
        "Accept-Language": "de-DE,de;q=0.9,en-US;q=0.8,en-DE;q=0.7,en;q=0.6",
        "Cache-Control": "max-age=0",
        "Origin": "https://mietplan-dresden.de",
        "Referer": "https://mietplan-dresden.de/login/",
        "DNT": "1",
        "Sec-CH-UA": '"Not A(Brand";v="8", "Chromium";v="132", "Google Chrome";v="132"',
        "Sec-CH-UA-Mobile": "?0",
        "Sec-CH-UA-Platform": '"Windows"',
        "Sec-Fetch-Dest": "document",
        "Sec-Fetch-Mode": "navigate",
        "Sec-Fetch-Site": "same-origin",
        "Sec-Fetch-User": "?1",
        "Upgrade-Insecure-Requests": "1",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
    }

    # Form data
    data = {
        "csrf": csrf_token,  # Dynamically extracted
        "fdInf_ID": "CY25277299X1116e405343XY7477",
        "fdCtrlType": "FORWARDER",
        "fdMode": "MODE_LOGIN",
        "dbUserID": username,
        "dbPasswort": password,
    }

    response = session.post(login_url, headers=headers, data=data)

    response.raise_for_status()

In [10]:
session = requests.Session()

username = credentials["username"]
password = credentials["password"]
login(session, username, password)

In [11]:
webdepot_url = "https://mietplan-dresden.de/webdepot/"

response = session.get(webdepot_url)

In [47]:
class FolderMetadata:
    name: str
    folder_id: str
    has_subfolders: bool

    @staticmethod
    def from_json(json_data: dict) -> 'FolderMetadata':
        metadata = FolderMetadata()

        metadata.name = json_data["filename"]
        metadata.folder_id = json_data["fileid"]
        metadata.has_subfolders = json_data["filechildren"]

        return metadata

    def __repr__(self) -> str:
        return f"FolderMetadata(name={self.name}, folder_id={self.folder_id}, has_subfolders={self.has_subfolders})"

In [49]:
def get_folders(session: requests.Session, parent_folder_id: str) -> list[FolderMetadata ]:
    main_folder_url = f"https://mietplan-dresden.de/moxanos/json?&svc=org.auctores.bvi.mietplan2&msg=getFolders&fdFolder={parent_folder_id}"

    response = session.get(main_folder_url)
    response.raise_for_status()

    folders = [FolderMetadata.from_json(folder_json) for folder_json in response.json().values()]

    return folders

main_folder_id = "ac4do35ktgfi79j8ids35om8udm"

folders = get_folders(session, main_folder_id)

In [53]:
file_url = "https://mietplan-dresden.de/moxanos/json?&svc=org.auctores.bvi.mietplan2&msg=getFiles&fdFolder=aq00vmb30jvhql0fp04sdog4m3b"

response = session.get(file_url)

In [51]:
from datetime import datetime


class FileMetadata:
    creation_date: datetime
    download_path: str

    @staticmethod
    def from_json(json_data: dict) -> 'FileMetadata':
        metadata = FileMetadata()

        date_format = "%d.%m.%Y"
        metadata.creation_date = datetime.strptime(json_data["filecrea"], date_format)
        metadata.download_path = json_data["filepath"]

        return metadata

    def __repr__(self) -> str:
        return f"FileMetadata(creation_date={self.creation_date}, download_path={self.download_path})"

def get_files(session: requests.Session, folder_id: str) -> list[FileMetadata]:
    file_url = f"https://mietplan-dresden.de/moxanos/json?&svc=org.auctores.bvi.mietplan2&msg=getFiles&fdFolder={folder_id}"

    response = session.get(file_url)
    response.raise_for_status()

    files = [FileMetadata.from_json(file_json) for file_json in response.json().values()]

    return files

In [57]:
some_folder_id = folders[1].folder_id
files = get_files(session, some_folder_id)
files

[FileMetadata(creation_date=2024-09-05 00:00:00, download_path=/download/avfkmj4qelgh2jif884i1celf15/2023 Bod25-25a, B&ouml;1, GW2.pdf),
 FileMetadata(creation_date=2023-06-28 00:00:00, download_path=/download/apenpshuo4kmfb1rqb1imgqj7ve/2022 Bod25-25a, B&ouml;1, GW2.pdf)]

In [60]:
import html

download_path = html.unescape(files[0].download_path)
print(f"Download path: {download_path}")

Download path: /download/avfkmj4qelgh2jif884i1celf15/2023 Bod25-25a, Bö1, GW2.pdf


In [64]:
def download_file(session: requests.Session, download_path: str) -> None:
    download_base = "https://mietplan-dresden.de"
    download_url = download_base + download_path

    # download file from download_url
    response = session.get(download_url)

    # save file to disk
    filename = download_url.split("/")[-1]
    with open(filename, "wb") as file:
        file.write(response.content)