In [None]:
# %pip install pycirclizely

In [2]:
import plotly.io as pio
from IPython.display import HTML

# Circos plot: virus and prokaryotes

In [3]:
from Bio.SeqFeature import SeqFeature


def hover_formatter(feature: SeqFeature) -> str:
    """Custom function for genomic features annotation hover text."""
    info = []

    if hasattr(feature, "qualifiers") and feature.qualifiers.get("gene", [""])[0]:
        info.append(f"Gene: {feature.qualifiers['gene'][0]}")

    if hasattr(feature, "type") and feature.type:
        info.append(f"Type: {feature.type}")

    if hasattr(feature, "location") and hasattr(feature.location, "strand"):
        strand = "+" if feature.location.strand == 1 else "-"
        info.append(f"Strand: {strand}")

    if hasattr(feature, "location"):
        start = (
            int(feature.location.start) if hasattr(feature.location, "start") else "?"
        )
        end = int(feature.location.end) if hasattr(feature.location, "end") else "?"
        info.append(f"Position: {start}-{end}")

    if hasattr(feature, "qualifiers") and feature.qualifiers.get("product", [""])[0]:
        info.append(f"Product: {feature.qualifiers['product'][0]}")

    return "<br>".join(info) if info else "No information available"

## 1. Enterobacteria phage

In [None]:
from pycirclizely import Circos
from pycirclizely.parser import Gff
from pycirclizely.utils import load_prokaryote_example_file

# Load GFF file
gff_file = load_prokaryote_example_file("enterobacteria_phage.gff")
gff = Gff(gff_file)

# Initialize circos instance
seqid2size = gff.get_seqid2size()
space = 0 if len(seqid2size) == 1 else 2
circos = Circos(sectors=seqid2size, space=space)
circos.text("Enterobacteria phage<br>(NC_000902)", font=dict(size=20))

seqid2features = gff.get_seqid2features(feature_type="CDS")
for sector in circos.sectors:
    cds_track = sector.add_track((90, 100))
    cds_track.axis(fillcolor="#EEEEEE", line=dict(width=0))

    features = seqid2features[sector.name]
    for feature in features:
        # Plot CDS features
        if feature.location.strand == 1:
            cds_track.genomic_features(
                feature,
                plotstyle="arrow",
                r_lim=(95, 100),
                hover_text_formatter=hover_formatter,
                fillcolor="salmon",
            )
        else:
            cds_track.genomic_features(
                feature,
                plotstyle="arrow",
                r_lim=(90, 95),
                hover_text_formatter=hover_formatter,
                fillcolor="skyblue",
            )

    # Plot xticks & intervals on inner position
    cds_track.xticks_by_interval(
        interval=5000,
        outer=False,
        label_formatter=lambda v: f"{v/ 1000:.1f} Kb",
        label_orientation="vertical",
        line_kws=dict(line=dict(color="grey")),
    )

fig = circos.plotfig()
HTML(pio.to_html(fig, include_plotlyjs="cdn"))

## 2. Escherichia coli

In [None]:
import numpy as np
import plotly.graph_objects as go

from pycirclizely import Circos
from pycirclizely.parser import Genbank
from pycirclizely.utils import load_prokaryote_example_file

# Load Genbank file
gbk_file = load_prokaryote_example_file("escherichia_coli.gbk.gz")
gbk = Genbank(gbk_file)

# Initialize circos instance
seqid2size = gbk.get_seqid2size()
space = 0 if len(seqid2size) == 1 else 2
circos = Circos(sectors=seqid2size, space=space)
circos.text("Escherichia coli<br>(NC_000913)", font=dict(size=14))

seqid2features = gbk.get_seqid2features(feature_type=None)
seqid2seq = gbk.get_seqid2seq()
for sector in circos.sectors:
    # Plot outer track with xticks
    major_ticks_interval = 500000
    minor_ticks_interval = 100000
    outer_track = sector.add_track((98, 100))
    outer_track.axis(fillcolor="lightgrey")
    if sector.size >= major_ticks_interval:
        outer_track.xticks_by_interval(
            major_ticks_interval, label_formatter=lambda v: f"{v/ 10 ** 6:.1f} Mb"
        )
        outer_track.xticks_by_interval(
            minor_ticks_interval, tick_length=1, show_label=False
        )

    f_cds_track = sector.add_track((90, 97), r_pad_ratio=0.1)
    r_cds_track = sector.add_track((83, 90), r_pad_ratio=0.1)
    rrna_track = sector.add_track((76, 83), r_pad_ratio=0.1)
    trna_track = sector.add_track((69, 76), r_pad_ratio=0.1)

    # Plot Forward CDS, Reverse CDS, rRNA, tRNA
    features = seqid2features[sector.name]
    for feature in features:
        if feature.type == "CDS" and feature.location.strand == 1:
            f_cds_track.genomic_features(
                feature,
                hover_text_formatter=hover_formatter,
                fillcolor="red",
                line=dict(color=None, width=0),
            )
        elif feature.type == "CDS" and feature.location.strand == -1:
            r_cds_track.genomic_features(
                feature,
                hover_text_formatter=hover_formatter,
                fillcolor="blue",
                line=dict(color=None, width=0),
            )
        elif feature.type == "rRNA":
            rrna_track.genomic_features(
                feature,
                hover_text_formatter=hover_formatter,
                fillcolor="green",
                line=dict(color=None, width=0),
            )
        elif feature.type == "tRNA":
            trna_track.genomic_features(
                feature,
                hover_text_formatter=hover_formatter,
                line=dict(color="magenta", width=0.1),
            )

    # Plot GC content
    gc_content_track = sector.add_track((50, 65))
    seq = seqid2seq[sector.name]
    label_pos_list, gc_contents = gbk.calc_gc_content(seq=seq)
    gc_contents = gc_contents - gbk.calc_genome_gc_content(seq=gbk.full_genome_seq)
    positive_gc_contents = np.where(gc_contents > 0, gc_contents, 0)
    negative_gc_contents = np.where(gc_contents < 0, gc_contents, 0)
    abs_max_gc_content = np.max(np.abs(gc_contents))
    vmin, vmax = -abs_max_gc_content, abs_max_gc_content
    gc_content_track.fill_between(
        label_pos_list, positive_gc_contents, 0, vmin=vmin, vmax=vmax, fillcolor="black"
    )
    gc_content_track.fill_between(
        label_pos_list, negative_gc_contents, 0, vmin=vmin, vmax=vmax, fillcolor="grey"
    )

    # Plot GC skew
    gc_skew_track = sector.add_track((35, 50))
    label_pos_list, gc_skews = gbk.calc_gc_skew(seq=seq)
    positive_gc_skews = np.where(gc_skews > 0, gc_skews, 0)
    negative_gc_skews = np.where(gc_skews < 0, gc_skews, 0)
    abs_max_gc_skew = np.max(np.abs(gc_skews))
    vmin, vmax = -abs_max_gc_skew, abs_max_gc_skew
    gc_skew_track.fill_between(
        label_pos_list, positive_gc_skews, 0, vmin=vmin, vmax=vmax, fillcolor="olive"
    )
    gc_skew_track.fill_between(
        label_pos_list, negative_gc_skews, 0, vmin=vmin, vmax=vmax, fillcolor="purple"
    )

fig = circos.plotfig()
# Add legend using dummy traces
legend_labels = [
    "Forward CDS",
    "Reverse CDS",
    "rRNA",
    "tRNA",
    "Positive GC Content",
    "Negative GC Content",
    "Positive GC Skew",
    "Negative GC Skew",
]

legend_colors = ["red", "blue", "green", "magenta", "black", "grey", "olive", "purple"]

marker_symbols = [
    "square",
    "square",
    "square",
    "square",
    "triangle-up",
    "triangle-down",
    "triangle-up",
    "triangle-down",
]

# Add legend with dummy trace
for label, color, symbol in zip(legend_labels, legend_colors, marker_symbols):
    fig.add_trace(
        go.Scatter(
            x=[None],
            y=None,
            mode="markers",
            marker=dict(color=color, symbol=symbol, size=9),
            name=label,
            showlegend=True,
        )
    )
fig.update_layout(
    legend=dict(
        x=1,
        y=0.05,
        xanchor="center",
        yanchor="middle",
        font=dict(size=12),
        bgcolor="rgba(255,255,255,0.0)",
    )
)
HTML(pio.to_html(fig, include_plotlyjs="cdn"))

## 3. Mycoplasma alvi

In [None]:
from pycirclizely import Circos
from pycirclizely.parser import Genbank, Gff
from pycirclizely.utils import load_prokaryote_example_file

# Case1. Load `GFF` contig genomes
# https://github.com/moshi4/pycirclizely-data/blob/main/prokaryote/mycoplasma_alvi.gff
gff_file = load_prokaryote_example_file("mycoplasma_alvi.gff")
gff_parser = Gff(gff_file)

# Case2. Load `Genbank` contig genomes (try this one)
# https://github.com/moshi4/pycirclizely-data/blob/main/prokaryote/mycoplasma_alvi.gbk
gbk_file = load_prokaryote_example_file("mycoplasma_alvi.gbk")
gbk_parser = Genbank(gbk_file)

# Initialize circos instance
seqid2size = gbk_parser.get_seqid2size()
space = 0 if len(seqid2size) == 1 else 2
circos = Circos(seqid2size, space=space)
circos.text(
    f"Mycoplasma alvi<br>({len(circos.sectors)} contigs)", r=15, font=dict(size=20)
)

seqid2features = gbk_parser.get_seqid2features(feature_type=None)
for sector in circos.sectors:
    # Plot outer track
    outer_track = sector.add_track((99.7, 100))
    outer_track.axis(fillcolor="black")
    major_interval = 100000
    minor_interval = int(major_interval / 10)
    if sector.size > minor_interval:
        outer_track.xticks_by_interval(
            major_interval, label_formatter=lambda v: f"{v / 1000:.0f} Kb"
        )
        outer_track.xticks_by_interval(minor_interval, tick_length=1, show_label=False)

    # Plot forward/reverse CDS, rRNA, tRNA tracks
    f_cds_track = sector.add_track((91, 98), r_pad_ratio=0.1)
    r_cds_track = sector.add_track((84, 91), r_pad_ratio=0.1)
    rrna_track = sector.add_track((77, 84), r_pad_ratio=0.1)
    trna_track = sector.add_track((70, 77), r_pad_ratio=0.1)
    for feature in seqid2features[sector.name]:
        if feature.type == "CDS":
            if feature.location.strand == 1:
                f_cds_track.genomic_features(
                    [feature],
                    hover_text_formatter=hover_formatter,
                    fillcolor="tomato",
                    line=dict(width=0.0),
                )
            else:
                r_cds_track.genomic_features(
                    [feature],
                    hover_text_formatter=hover_formatter,
                    fillcolor="skyblue",
                    line=dict(width=0.0),
                )
        elif feature.type == "rRNA":
            rrna_track.genomic_features(
                [feature],
                hover_text_formatter=hover_formatter,
                fillcolor="lime",
                line=dict(width=0.0),
            )
        elif feature.type == "tRNA":
            trna_track.genomic_features(
                [feature],
                hover_text_formatter=hover_formatter,
                fillcolor="magenta",
                line=dict(width=0.0),
            )

fig = circos.plotfig()

# Add legend using dummy traces
legend_labels = [
    "Forward CDS",
    "Reverse CDS",
    "rRNA",
    "tRNA",
]

legend_colors = ["red", "blue", "green", "magenta"]

# Add all legend entries in a loop
for label, color in zip(legend_labels, legend_colors):
    fig.add_trace(
        go.Scatter(
            x=[None],
            y=None,
            mode="markers",
            marker=dict(color=color, symbol="square", size=12),
            name=label,
            showlegend=True,
        )
    )

# Update layout with legend position and style
fig.update_layout(
    legend=dict(
        x=0.5,
        y=0.40,
        xanchor="center",
        yanchor="middle",
        font=dict(size=12),
        bgcolor="rgba(255,255,255,0.0)",
    )
)
HTML(pio.to_html(fig, include_plotlyjs="cdn"))