In [None]:
# import libraries

import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from matplotlib.patches import Rectangle

In [None]:
def plot_psm_depth_with_cdrs(
    protein_sequence,
    mapped_psms,
    cdrs,
    highlight_colors,
    output_file="fig_depth_psms_cdrs.svg",
    title="PSM depth across the protein sequence with CDR regions",
):
    """
    Plot PSM depth along a protein sequence with CDR regions highlighted.

    Args:
        protein_sequence (str): Normalized protein sequence.
        mapped_psms (list of tuples): Each tuple should contain (start, end, ..., ...).
        cdrs (dict): Dictionary of CDR labels and their (start, end) positions.
        highlight_colors (dict): CDR labels mapped to their highlight colors.
        output_file (str): Path to save the SVG file.
        title (str): Plot title.
    """
    depth = np.zeros(len(protein_sequence), dtype=int)
    for _, (start, end, _, _) in mapped_psms:
        depth[start:end] += 1

    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=list(range(len(protein_sequence))),
            y=depth,
            mode="lines",
            line=dict(color="steelblue", width=2),
            fill="tozeroy",
            fillcolor="rgba(70, 130, 180, 0.2)",
            name="PSM Depth",
        )
    )

    for label, (start, end) in cdrs.items():
        fig.add_shape(
            type="rect",
            x0=start,
            x1=end,
            y0=0,
            y1=max(depth),
            fillcolor=highlight_colors.get(label, "gray"),
            opacity=0.3,
            line=dict(width=0),
            layer="below",
        )
        fig.add_annotation(
            x=(start + end) / 2,
            y=max(depth) + 8,
            text=label,
            showarrow=False,
            font=dict(size=14, color="black"),
            xanchor="center",
        )

    fig.update_layout(
        title=title,
        xaxis=dict(
            title="Amino acid position",
            tickmode="linear",
            dtick=10,
            showline=True,
            linecolor="black",
            linewidth=2,
            showgrid=False,
        ),
        yaxis=dict(
            title="Depth (Number of matching PSMs per position)",
            showline=True,
            linecolor="black",
            linewidth=2,
            showgrid=False,
        ),
        template="plotly_white",
        height=450,
        width=1000,
        margin=dict(t=60),
        showlegend=False,
    )

    fig.show()
    fig.write_image(output_file, format="svg", scale=2)

To run the previous function you need to create mapped_psms and protein_norm first you can find in the main pipeline.

In [None]:
cdrs = {"CDR1": (31, 35), "CDR2": (50, 66), "CDR3": (99, 115)}
highlight_colors = {"CDR1": "orange", "CDR2": "lightgreen", "CDR3": "deepskyblue"}

plot_psm_depth_with_cdrs(
    protein_sequence=protein_norm,
    mapped_psms=mapped_psms,
    cdrs=cdrs,
    highlight_colors=highlight_colors,
    output_file="fig_4C_depth_psms_cdrs.svg",
)

In [None]:
def plot_cdr_scaffolds(
    reference_seq,
    scaffold_info,
    cdrs,
    highlight_colors,
    font_family="monospace",
    font_size=25,
    letter_spacing=1.5,
    save_path=None,
):
    """
    Plot reference sequence and full scaffold sequences aligned, highlighting CDR regions,
    with adjustable letter spacing for sequence letters.

    """

    def chars_equal(a, b):
        return (a in ["L", "I"] and b in ["L", "I"]) or a == b

    def find_alignment_offset(ref, seq):
        for pos in range(len(ref) - len(seq) + 1):
            if all(chars_equal(ref[pos + i], seq[i]) for i in range(len(seq))):
                return pos
        return 0

    fig_width = max(10, len(reference_seq) * letter_spacing / 8)
    fig, ax = plt.subplots(figsize=(fig_width, 6))
    ax.axis("off")

    for name, (start, end) in cdrs.items():
        x0 = (start - 1) * letter_spacing + 1
        width = (end - start + 1) * letter_spacing
        ax.add_patch(
            Rectangle((x0, 4.5), width, 0.8, color=highlight_colors[name], alpha=0.3)
        )
        ax.text(
            x0 + width / 2,
            5.5,
            name,
            ha="center",
            va="bottom",
            fontsize=font_size,
            fontweight="bold",
            color=highlight_colors[name],
        )

    ax.text(-5, 5, "Reference", fontfamily=font_family, fontsize=font_size, ha="right")
    for i, aa in enumerate(reference_seq):
        x = i * letter_spacing + 1
        ax.text(x, 5, aa, fontfamily=font_family, fontsize=font_size, color="black")

    y_positions = [4, 3, 2]
    for idx, (scaf_name, seq) in enumerate(scaffold_info):
        cdr_name = f"CDR{idx+1}"
        color = highlight_colors[cdr_name]
        offset = find_alignment_offset(reference_seq, seq)

        start, end = cdrs[cdr_name]
        rel_start = max(start - 1 - offset, 0)
        rel_end = min(end - 1 - offset, len(seq) - 1)

        ax.text(
            -5,
            y_positions[idx],
            f"{cdr_name} ({scaf_name})",
            fontfamily=font_family,
            fontsize=font_size,
            ha="right",
        )

        for i, aa in enumerate(seq):
            x = (offset + i) * letter_spacing + 1
            ax.text(
                x,
                y_positions[idx],
                aa,
                fontfamily=font_family,
                fontsize=font_size,
                color="black",
            )

        if rel_end >= rel_start:
            x0 = (offset + rel_start) * letter_spacing + 1
            width = (rel_end - rel_start + 1) * letter_spacing
            ax.add_patch(
                Rectangle(
                    (x0, y_positions[idx] - 0.2), width, 0.8, color=color, alpha=0.3
                )
            )

    ax.set_xlim(0, len(reference_seq) * letter_spacing + 5)
    ax.set_ylim(1.5, 6)

    plt.tight_layout()
    if save_path:
        plt.savefig(save_path, format="svg", dpi=600, bbox_inches="tight")
    plt.show()

In [None]:
reference_seq = "QVQLQESGGGLVQPGGSLRLSCAASGSASSMYTLAWYRQAPGKQRELVALITSGHMTHYEDSVKGRFTISRDNAKEVLYLQMNSLKPEDTAVYFCNLHRLTSSDDDGRTWGQGTQVTVSSAAADYKDHDGDYKDHDIDYKDDDDKGAAHHHHHH"

cdrs = {"CDR1": (31, 35), "CDR2": (50, 66), "CDR3": (99, 115)}
highlight_colors = {"CDR1": "orange", "CDR2": "lightgreen", "CDR3": "deepskyblue"}
scaffold_info = [
    (
        "scaffold_5",
        "QVQLQESGGGLVQPGGSLRLSCAASGSASSMYTLAWYRQAPGKQRELVALLTSGHMTHYEDSVKGRFY",
    ),
    ("scaffold_10", "SYFCNLHRLTSSDDDGRTWGQGTQVTVSSAAADYKDHDGDYKDHDLDYKDDDDKGAAH"),
]

plot_cdr_scaffolds(
    reference_seq,
    scaffold_info,
    cdrs,
    highlight_colors,
    font_size=20,
    letter_spacing=3,  # increased spacing
    save_path="cdrs_scaffolds.svg",
)