In [2]:
from pathlib import Path
import os
from IPython import get_ipython
from IPython.core.magic import register_cell_magic

ipython = get_ipython()


@register_cell_magic
def pybash(line, cell):
    ipython.run_cell_magic("bash", "", cell.format(**globals()))

In [3]:
tmp_dir = Path("../tmp/klk_overlay")
data_archive = Path("../data/KLK.zip")

os.makedirs(tmp_dir, exist_ok=True)

In [4]:
import zipfile
from typing import Union
from pathlib import Path


def extract_klk_folder(
    archive_path: Union[str, Path], output_dir: Union[str, Path]
) -> None:
    """
    Extract only the KLK folder from a zip archive to the specified output directory.
    Files will be extracted directly into the output directory without the KLK folder prefix.

    Parameters
    ----------
    archive_path : Union[str, Path]
        Path to the zip archive file
    output_dir : Union[str, Path]
        Directory where the KLK files will be extracted

    Returns
    -------
    None
    """
    # Convert to Path objects if they're strings
    archive_path = Path(archive_path)
    output_dir = Path(output_dir)

    # Create the output directory if it doesn't exist
    output_dir.mkdir(parents=True, exist_ok=True)

    # Extract only the KLK folder contents directly into output_dir
    with zipfile.ZipFile(archive_path, "r") as zip_ref:
        klk_files = [
            f
            for f in zip_ref.namelist()
            if (f.startswith("KLK/") and f.endswith(".cif"))
        ]
        for file in klk_files:
            file_path = Path(file)
            target_path = output_dir / file_path.name
            # Extract the file content
            with zip_ref.open(file) as source, open(target_path, "wb") as target:
                target.write(source.read())

    print(f"Extracted KLK folder contents from {archive_path} to {output_dir}")


# Create structures directory
structures_dir = tmp_dir / "structures"
structures_dir.mkdir(exist_ok=True)

# Extract only the KLK folder from the archive to the structures directory
extract_klk_folder(data_archive, structures_dir)


Extracted KLK folder contents from ../data/KLK.zip to ../tmp/klk_overlay/structures


In [7]:
svg_dir = tmp_dir / "svg"
svg_dir.mkdir(exist_ok=True)

style = """
[helix]
fill_color = "#FF7D7D"
opacity = 0.1


[sheet]
fill_color = "#7D7DFF"
opacity = 0.1

[coil]
stroke_color = "#777777" 
opacity = 0.1

[canvas]
opacity = 0.0
width = 800
height = 600
"""

style_file = tmp_dir / "style.toml"
style_file.write_text(style)

for file in structures_dir.glob("*.cif"):
    !uv run flatprot project {file} {svg_dir / file.stem}.svg --quiet --style {style_file}


^C


In [8]:
import drawsvg as draw

# Create a new drawing for the overlay
overlay = draw.Drawing(800, 600)

# Add each SVG to the overlay
for svg_file in svg_dir.glob("*.svg"):
    # Read the SVG content
    svg_content = svg_file.read_text()

    # Extract the SVG elements (skip the first line which is the XML declaration)
    svg_elements = (
        svg_content.split("\n", 1)[1] if "<?xml" in svg_content else svg_content
    )

    # Add the SVG content as a group to the overlay
    # We use a foreignObject to embed the SVG content directly
    group = draw.Raw(svg_elements)
    overlay.append(group)

# Save the overlay SVG
overlay_path = tmp_dir / "overlay.svg"
overlay.save_svg(str(overlay_path))

print(
    f"Created overlay of {len(list(svg_dir.glob('*.svg')))} SVG files at {overlay_path}"
)

Created overlay of 436 SVG files at ../tmp/klk_overlay/overlay.svg


In [None]:
# Copyright 2025 Tobias Olenyi.
# SPDX-License-Identifier: Apache-2.0

# Display the overlay SVG in the notebook
from IPython.display import SVG, display

# Load and display the overlay SVG
display(SVG(str(overlay_path)))

# You can also add a title before displaying the SVG
print("Overlay of protein structures:")
