In [2]:
import typing
import numpy as np
import pandas as pd
import plotly.graph_objects as pgo
import plotly.subplots as ps
import plotly.express as px

from datapaths import *

pd.set_option('display.max_rows', 4)


In [14]:
def get_plotly_data(path: Path) -> pd.DataFrame:
    flipons_to_data = pd.read_table(path).fillna('')
    result = (
        flipons_to_data
        .loc[:, ["Feature Group", "Gene Feature", "Flipon"]]
        .groupby(["Gene Feature", "Feature Group"])[["Flipon"]]
        .value_counts()
        .to_frame()
        .rename(columns={0: "count"})
        .reset_index()
    )
    # result['Feature Group'] = result['Feature Group'].str.replace('& ', '&<br>')
    result = result[result['Feature Group'] != ''].replace(
        {"g4": "G4", "z-dna": "Z-DNA", "h-dna": "H-DNA", "sidd": "SIDD"}
    )


    return result


In [15]:
groups = ("G4", "Z-DNA", "SIDD", "H-DNA")
annotations = (
    "Promoter (<=1kb)",
    "Promoter (1-2kb)",
    "Promoter (2-3kb)",
    "5' UTR",
    "Exon",
    "Intron",
    "3' UTR",
    "Downstream (<=300bp)",
    "Distal Intergenic",
)
feature_groups = (
    "cCRE",
    "cCRE & CTCF",
    "LINE/LTR",
    # "cCRE &<br>LINE/LTR",
    # "CTCF",
    # "LINE/LTR &<br>CTCF",
    # "cCRE &<br>LINE/LTR &<br>CTCF",
)

In [60]:
def plot_matrix(
    groups: typing.Iterable,
    annotations: typing.Iterable,
    feature_groups: typing.Iterable,
    plotly_data: pd.DataFrame,
    title: str = "Generic Title",
    fig_title: str = "figure.png",
    height: int = 900,
    width: int = 1800,
):
    colors = {
        "G4": "#e53237",
        "Z-DNA": "#0064d1",
        "SIDD": "#f5af01",
        "H-DNA": "#86b818",
    }
    n_rows = len(feature_groups)
    n_cols = len(annotations)
    fig = ps.make_subplots(
        rows=n_rows,
        cols=n_cols,
        shared_xaxes="all",
        shared_yaxes="all",
        horizontal_spacing=0.004,
        vertical_spacing=0.01,
        column_titles=annotations,
        row_titles=feature_groups,
    )

    for i, feature_group in enumerate(feature_groups):
        for j, annotation in enumerate(annotations):
            df = (
                plotly_data[
                    (plotly_data["Feature Group"] == feature_group)
                    & (plotly_data["Gene Feature"] == annotation)
                ]
                .set_index("Flipon")
                .reindex(groups)
                .dropna()
            )

            if not len(df):
                continue

            # https://stackoverflow.com/questions/70347594/how-to-format-plotly-legend-when-using-marker-color
            for idx, row in df.iterrows():
                sub = pgo.Bar(
                    x=[idx],
                    y=[row["count"]],
                    name=idx,
                    textposition="outside"
                    if row["count"] < plotly_data["count"].max() // 1.25
                    else "inside",
                    texttemplate="%{y:,d}",
                    marker=dict(color=colors[idx]),
                    width=1,
                    # orientation="h",
                    showlegend=True if (i == 0 and j == 0) else False,
                )
                fig.add_trace(sub, row=i + 1, col=j + 1)
            fig.update_yaxes(ticks="", row=i + 1, col=j + 1)

    fig.update_yaxes(
        tickformat="s",
        showgrid=False,
        tickmode="array",
        tickvals=[round(plotly_data["count"].max() / 4, -3) * i for i in range(0, 5)][1:],
    )
    fig.update_yaxes(ticks="outside", col=1)
    fig.update_xaxes(type="category", visible=False)
    fig.update_annotations(textangle=0, font=dict(size=16), align="left")
    fig.update_layout(
        title=f"<span style='font-size: 16px;'>{title}</span>",
        title_x=0.005,
        title_y=0.97,
        height=height,
        width=width,
        font=dict(family="monospace", size=12),
        uniformtext_minsize=12,
        margin=dict(l=5, t=55, b=5, r=80),
        legend=dict(
            font_size=18,
            # traceorder="reversed",
            orientation="h",
            yanchor="bottom",
            y=1.07,
            xanchor="right",
            x=0.98,
        ),
        # template="ggplot2",
    )

    fig.write_image(D_IMG / fig_title, scale=2)
    fig.show()


In [61]:
data_mirna = get_plotly_data(F_FLIPON_TO_DATA)
plot_matrix(
    groups,
    annotations,
    feature_groups,
    plotly_data=data_mirna,
    title=f"Flipons ({data_mirna['count'].sum():,d})",
    height=400,
    fig_title="Figure.draft.1c.png",
)


In [62]:
data_mirna = get_plotly_data(F_FLIPON_TO_DATA_MIRNA)
plot_matrix(
    groups,
    annotations,
    feature_groups,
    plotly_data=data_mirna,
    title=f"Flipons & miRNAs ({data_mirna['count'].sum():,d})",
    height=400,
    fig_title="Figure.draft.1d.png",
)
