## Sources Compression

This notebook describes the process used to store all sources in the pharmalink package. \
The goal is to embed the files within the pharmalink package to enable their seamless distribution alongside the project code.

In [23]:
import pathlib as path
import lzma
import shutil

In [24]:
# Establish notebook path for handling relative paths in the notebook
notebook_path = path.Path().resolve()

if notebook_path.stem != "sources":
    raise Exception(
        "Notebook file root must be set to parent directory of the notebook. Please resolve and re-run."
    )

In [25]:
sources = [
    ["transport_modes", "transport_modes.json"],
    ["admin_areas", "admin_areas.gpkg.xz"],
    ["population_grid", "population_grid.gpkg.xz"],
    ["residential_areas", "residential_areas"],
]

In [27]:
# Create a folder in the code directory for the source files
copy_to = notebook_path.parent.joinpath("code", "sources")

if copy_to.exists():
    shutil.rmtree(copy_to)

copy_to.mkdir(parents=True)

# Copy the source files to the code directory
for source in sources:
    copy_from = notebook_path.joinpath(*source)

    if not copy_from.exists():
        source_name = source[0]

        raise Exception(
            f"Source {source_name} does not exist, please run {source_name}/{source_name}.ipynb and repeat."
        )

    if copy_from.is_file():
        shutil.copy(copy_from, copy_to)
    elif copy_from.is_dir():

        copy_to = copy_to.joinpath(source[0])

        shutil.copytree(copy_from, copy_to)

Exception: Source admin_areas does not exist, please run admin_areas/admin_areas.ipynb and repeat.

In [9]:
# Compress the files into a single archive

compressed_sources = notebook_path.joinpath("sources.xz")

if compressed_sources.exists():
    compressed_sources.unlink()

with lzma.open(compressed_sources, "wb", preset=9) as archive:

    for file in sources:
        origin_path = notebook_path.joinpath(file[0], file[1])

        # Check if the file exists
        if not origin_path.exists():
            raise Exception(f"File {origin_path.name} does not exist.")

        # Write the file to the archive
        with origin_path.open("rb") as origin_file:
            archive.write(origin_file.read())

        print(f"Added {origin_path.name} to the archive.")

print(
    f"Compression finished. Archive size: {compressed_sources.stat().st_size // (1024 * 1024)} MB"
)

Added transport_modes.json to the archive.
Added admin_areas.gpkg to the archive.
Added population_grid.gpkg to the archive.
Added residential_areas.gpkg to the archive.
Compression finished. Archive size: 348 MB


In [4]:
# Compress the files into archives in a "sources" subfolder

compressed_sources_folder = notebook_path.joinpath("sources")

if compressed_sources_folder.exists():
    for file in compressed_sources_folder.iterdir():
        file.unlink()
else:
    compressed_sources_folder.mkdir()

for file in sources:

    origin_path = notebook_path.joinpath(file[0], file[1])
    compressed_path = compressed_sources_folder.joinpath(file[1] + ".xz")

    if not origin_path.exists():
        raise Exception(f"File {origin_path.name} does not exist.")

    with lzma.open(compressed_path, "wb", preset=9) as archive:
        with origin_path.open("rb") as origin_file:
            archive.write(origin_file.read())

    print(f"Added {origin_path.name} to the archive.")

print(
    f"Compression finished. Archive size: {sum(f.stat().st_size for f in compressed_sources_folder.iterdir()) // (1024 * 1024)} MB"
)

Added transport_modes.json to the archive.
Added admin_areas.gpkg to the archive.
Added population_grid.gpkg to the archive.
Added residential_areas.gpkg to the archive.
Compression finished. Archive size: 348 MB
