In [None]:
import quackosm as qosm
import psutil
from psutil._common import bytes2human
from pathlib import Path
from multiprocessing.pool import ThreadPool
import time
from pooch import retrieve
import tempfile
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as tkr
from cpuinfo import get_cpu_info

In [None]:
sns.set_theme(style="darkgrid")

In [None]:
def get_cpu_cores(p: psutil.Process) -> tuple[float, float]:
    """Get CPU usage."""
    while True:
        try:
            return (
                (
                    p.cpu_percent(interval=None)
                    + sum(_pc.cpu_percent(interval=None) for _pc in p.children(recursive=True))
                )
                / 100,
                round(time.time(), 2),
            )
        except:
            pass


def get_cpu_percentage(p: psutil.Process) -> tuple[float, float]:
    """Get CPU usage."""
    while True:
        try:
            return (
                (
                    p.cpu_percent(interval=None)
                    + sum(_pc.cpu_percent(interval=None) for _pc in p.children(recursive=True))
                )
                / psutil.cpu_count(),
                round(time.time(), 2),
            )
        except:
            pass


def get_memory_percentage(p: psutil.Process) -> tuple[float, float]:
    """Get RAM usage."""
    while True:
        try:
            return (
                p.memory_percent()
                + sum(_pc.memory_percent() for _pc in p.children(recursive=True)),
                round(time.time(), 2),
            )
        except:
            pass


def get_memory_bytes_size(p: psutil.Process) -> tuple[float, float]:
    """Get RAM usage."""
    while True:
        try:
            return (p.memory_full_info().rss, round(time.time(), 2))
        except:
            pass


def get_directory_bytes_size(directory: Path) -> tuple[float, float]:
    """Get directory size in bytes."""
    while True:
        try:
            return (
                sum(f.stat().st_size for f in Path(directory).rglob("*")),
                round(time.time(), 2),
            )
        except:
            pass

In [None]:
def _execute_example(pbf_path, working_directory) -> Path:
    return qosm.convert_pbf_to_gpq(
        pbf_path=pbf_path,
        working_directory=working_directory,
        ignore_cache=True,
        silent_mode=True,
    )


def _sizeof_fmt(x, pos):
    if x < 0:
        return ""
    return bytes2human(x)


def run_example(example_name: str, pbf_download_url: str) -> None:
    """Run example and monitor usage."""
    memory_values = []
    cpu_values = []
    disk_values = []

    downloaded_file = retrieve(url=pbf_download_url, known_hash=None)

    Path("files").mkdir(parents=True, exist_ok=True)
    p = psutil.Process()

    with tempfile.TemporaryDirectory(dir="files") as tmp_dir_name:
        start_memory, start_time = get_memory_bytes_size(p)
        memory_values.append(get_memory_bytes_size(p))
        cpu_values.append(get_cpu_cores(p))
        disk_values.append(get_directory_bytes_size(tmp_dir_name))

        time.sleep(0.1)

        memory_values.append(get_memory_bytes_size(p))
        cpu_values.append(get_cpu_cores(p))
        disk_values.append(get_directory_bytes_size(tmp_dir_name))

        with ThreadPool() as pool:
            r = pool.apply_async(_execute_example, args=(downloaded_file, tmp_dir_name))
            while not r.ready():
                memory_values.append(get_memory_bytes_size(p))
                cpu_values.append(get_cpu_cores(p))
                disk_values.append(get_directory_bytes_size(tmp_dir_name))
            result_gpq_file = r.get()
            print(result_gpq_file)
            gpq_file_size = Path(result_gpq_file).stat().st_size

    time.sleep(0.1)

    memory_values.append(get_memory_bytes_size(p))
    cpu_values.append(get_cpu_cores(p))
    disk_values.append(get_directory_bytes_size(tmp_dir_name))

    memory_values_adjusted = [(val - start_memory, ts - start_time) for val, ts in memory_values]
    cpu_values_adjusted = [(val, ts - start_time) for val, ts in cpu_values]
    disk_values_adjusted = [(val, ts - start_time) for val, ts in disk_values]

    fig, ax = plt.subplots(3, sharex=True, figsize=(15, 10))

    mem_val, mem_times = zip(*memory_values_adjusted)
    cpu_val, cpu_times = zip(*cpu_values_adjusted)
    dsk_val, dsk_times = zip(*disk_values_adjusted)

    sns.lineplot(x=mem_times, y=cpu_val, ax=ax[0])
    sns.lineplot(x=cpu_times, y=mem_val, ax=ax[1])
    sns.lineplot(x=dsk_times, y=dsk_val, ax=ax[2])

    pbf_file_size = Path(downloaded_file).stat().st_size
    ax[2].hlines(y=pbf_file_size, xmin=dsk_times[0], xmax=dsk_times[-1], color="r", linestyles="--")

    ax[2].annotate(
        "PBF file size",
        xy=(dsk_times[len(dsk_times) // 2], pbf_file_size),
        xytext=(0, -8),
        textcoords="offset points",
        ha="center",
        va="top",
    )

    ax[2].hlines(
        y=gpq_file_size, xmin=dsk_times[0], xmax=dsk_times[-1], color="orange", linestyles="--"
    )

    ax[2].annotate(
        "Result file size",
        xy=(dsk_times[len(dsk_times) // 2], gpq_file_size),
        xytext=(0, 4),
        textcoords="offset points",
        ha="center",
        va="bottom",
    )

    ax[1].yaxis.set_major_formatter(tkr.FuncFormatter(_sizeof_fmt))
    ax[2].yaxis.set_major_formatter(tkr.FuncFormatter(_sizeof_fmt))

    ax[0].set_ylabel("CPU usage (threads)")
    ax[1].set_ylabel("Memory usage")
    ax[2].set_ylabel("Disk usage")

    ax[2].set_xlabel("Time (s)")

    plt.suptitle(f"Resources usage - {example_name}.osm.pbf file")

    plt.tight_layout()

    assets_path = Path(".").resolve().parent / "docs" / "assets" / "images"

    plt.savefig(assets_path / f"{example_name.lower()}_disk_spillage.png", bbox_inches="tight")

In [None]:
cpu_info = get_cpu_info()
cpu_name = cpu_info["brand_raw"]
cpu_cores = cpu_info["count"]
cpu_freq = cpu_info["hz_advertised_friendly"]

total_ram = bytes2human(psutil.virtual_memory().total)

print(f"{cpu_name} ({cpu_cores} threads, {cpu_freq} clock speed)\n{total_ram} total memory")

In [None]:
examples = {
    "Monaco": "https://download.geofabrik.de/europe/monaco-latest.osm.pbf",
    "Estonia": "https://download.geofabrik.de/europe/estonia-latest.osm.pbf",
    "Poland": "https://download.geofabrik.de/europe/poland-latest.osm.pbf",
}

In [None]:
for file_name, url in examples.items():
    run_example(file_name, url)