In [11]:
import typing
import pandas as pd
import plotly.graph_objects as pgo
import plotly.subplots as ps

from pathlib import Path
from datapaths import *

pd.set_option('display.max_rows', 4)

In [12]:
flipon_to_data = pd.read_table(F_FLIPON_TO_DATA)
flipon_to_data

Unnamed: 0,Flipon,Coordinates,Strand,Gene Feature,Gene Name,Gene Strand,Gene Type,Gene ID,Transcript ID,c. M miR (+),...,c. J miR (-),all M miR (+),all M miR (-),all MJ miR (+),all MJ miR (-),all J miR (+),all J miR (-),cCRE (+-200bp),LINE/LTR (+-200bp),Feature Group
0,g4,chr1:3014794-3014871,-,Distal Intergenic,4933401J01Rik,+,TEC,ENSMUSG00000102693.1,ENSMUST00000193812.1,miR-328 (1),...,,"miR-6994 (1), miR-129b (1)","miR-92a-2 (1), miR-698/miR-7078 (1), miR-3572 ...",miR-486a/miR-486b (1),"miR-3076 (1), miR-6418 (1), miR-6987 (1)",,miR-7649 (1),,"LINE@L1Md_F2(-), LINE@L1VL1(+)",LINE/LTR
1,g4,chr1:3099888-3099963,+,Promoter (2-3kb),Gm26206,+,snRNA,ENSMUSG00000064842.1,ENSMUST00000082908.1,,...,,"miR-365-1/miR-365-2 (1), miR-698/miR-7078 (1),...","miR-221 (1), miR-129b (1)","miR-6418 (1), miR-7067 (1)",,,,,"LINE@L1Md_F2(+), LTR@MTC(+)",LINE/LTR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77706,z-dna,chrY:4203100-4203146,.,Distal Intergenic,Gm28191,+,unprocessed_pseudogene,ENSMUSG00000100300.1,ENSMUST00000189112.1,,...,,"miR-466d/miR-466i (4), miR-466l (5)",,miR-493 (1),,,,,LINE@L1_Mur1(-),LINE/LTR
77707,z-dna,chrY:4207093-4207113,.,Distal Intergenic,Gm28191,+,unprocessed_pseudogene,ENSMUSG00000100300.1,ENSMUST00000189112.1,,...,,"miR-466a/miR-466e/miR-466p (1), miR-466d/miR-4...",,,,,,,"LINE@L1_Mus3(+), LINE@L1Md_A(+), LTR@ORR1B1(+)",LINE/LTR


In [13]:
def get_mirna_count(mirs: str) -> int:
    if type(mirs) == float:
        return 0
    mir_list = mirs.split(', ')
    return sum(int(mir.split(" ")[1][1:-1]) for mir in mir_list)


In [14]:
plotly_data = flipon_to_data.assign(
    m_mir_p_count=lambda df: df["c. M miR (+)"].apply(get_mirna_count),
    m_mir_m_count=lambda df: df["c. M miR (-)"].apply(get_mirna_count),
    mj_mir_p_count=lambda df: df["c. MJ miR (+)"].apply(get_mirna_count),
    mj_mir_m_count=lambda df: df["c. MJ miR (-)"].apply(get_mirna_count),
    j_mir_p_count=lambda df: df["c. J miR (+)"].apply(get_mirna_count),
    j_mir_m_count=lambda df: df["c. J miR (-)"].apply(get_mirna_count),
)[['Flipon', "m_mir_p_count", "m_mir_m_count", "mj_mir_p_count", "mj_mir_m_count", "j_mir_p_count", "j_mir_m_count"]]

plotly_data['mir_p_count'] = plotly_data[['m_mir_p_count', 'mj_mir_p_count', 'j_mir_p_count']].sum(axis=1)
plotly_data['mir_m_count'] = plotly_data[['m_mir_m_count', 'mj_mir_m_count', 'j_mir_m_count']].sum(axis=1)

plotly_data = plotly_data.melt(id_vars=["Flipon"], value_vars=["mir_p_count", "mir_m_count"]).groupby('Flipon').value_counts().rename('total').to_frame().reset_index()
plotly_data

Unnamed: 0,Flipon,variable,value,total
0,g4,mir_p_count,0,12799
1,g4,mir_m_count,0,12772
...,...,...,...,...
99,z-dna,mir_p_count,10,1
100,z-dna,mir_p_count,12,1


In [15]:
groups = ("G4", "Z-DNA", "SIDD", "H-DNA")
categories = {
    "mir_p_count": "+ strand",
    "mir_m_count": "- strand",
}
colors = {
    "+ strand": COLORS['SIDD'],
    "- strand": COLORS['Z-DNA'],
}

n_rows = 1
n_cols = 4

fig = ps.make_subplots(
    rows=n_rows,
    cols=n_cols,
    shared_xaxes="all",
    shared_yaxes="all",
    horizontal_spacing=0.004,
    vertical_spacing=0.01,
    column_titles=[
        f"<b>{x}</b> ({plotly_data[plotly_data['Flipon'] == x.lower()]['total'].sum():,d})"
        for x in groups
    ],
    y_title="Number of miR per flipon",
    x_title="Flipon count",
)

for i, group in enumerate(groups):
    for category, name in categories.items():
        df = plotly_data[
            (plotly_data["Flipon"] == group.lower())
            & (plotly_data["variable"] == category)
        ]
        sub = pgo.Bar(
            x=df["total"],
            y=df["value"],
            name=name,
            texttemplate="%{x:,d}",
            textposition=df["total"].apply(
                lambda x: "outside" if x < 17000 else "inside"
            ),
            marker=dict(color=colors[name]),
            orientation="h",
            width=0.4,
            showlegend=True if (i == 0) else False,
        )
        fig.add_trace(sub, row=1, col=i + 1)
    if i:
        fig.update_yaxes(ticks="", row=1, col=i + 1)


fig.update_xaxes(tickformat="s")
fig.update_xaxes(ticks="outside", row=n_rows)
fig.update_yaxes(dtick=1)
fig.update_annotations(font=dict(size=18))

fig.update_layout(
    # title=f"Flipons enriched with miRNA seed-regions ({plotly_data['count'].sum():,d})",
    # title_x=0.0035,
    # title_y=.985,
    height=700,
    width=1200,
    font=dict(size=16),
    uniformtext_minsize=16,
    uniformtext_mode="show",
    margin=dict(l=60, r=5, t=25, b=60),
    legend=dict(
        # title_text="Flipon group",
        # traceorder="reversed",
        orientation="h",
        yanchor="bottom",
        y=.925,
        xanchor="right",
        x=.99,
    ),
    template="ggplot2",
)

fig.write_image(D_IMG / "Figure.2C.png", scale=2)
fig.show()


In [16]:
plotly_data = flipon_to_data[flipon_to_data.filter(like='c. ').notna().sum(axis=1)>0].assign(
    m_mir_p_count=lambda df: df["c. M miR (+)"].apply(get_mirna_count),
    m_mir_m_count=lambda df: df["c. M miR (-)"].apply(get_mirna_count),
    mj_mir_p_count=lambda df: df["c. MJ miR (+)"].apply(get_mirna_count),
    mj_mir_m_count=lambda df: df["c. MJ miR (-)"].apply(get_mirna_count),
    j_mir_p_count=lambda df: df["c. J miR (+)"].apply(get_mirna_count),
    j_mir_m_count=lambda df: df["c. J miR (-)"].apply(get_mirna_count),
)[['Flipon', "m_mir_p_count", "m_mir_m_count", "mj_mir_p_count", "mj_mir_m_count", "j_mir_p_count", "j_mir_m_count"]]

plotly_data['mir_p_count'] = plotly_data[['m_mir_p_count', 'mj_mir_p_count', 'j_mir_p_count']].sum(axis=1)
plotly_data['mir_m_count'] = plotly_data[['m_mir_m_count', 'mj_mir_m_count', 'j_mir_m_count']].sum(axis=1)

plotly_data = plotly_data.melt(id_vars=["Flipon"], value_vars=["mir_p_count", "mir_m_count"]).groupby('Flipon').value_counts().rename('total').to_frame().reset_index()
plotly_data

Unnamed: 0,Flipon,variable,value,total
0,g4,mir_m_count,1,5366
1,g4,mir_p_count,1,5358
...,...,...,...,...
99,z-dna,mir_p_count,10,1
100,z-dna,mir_p_count,12,1


In [17]:
groups = ("G4", "Z-DNA", "SIDD", "H-DNA")
categories = {
    "mir_p_count": "+ strand",
    "mir_m_count": "- strand",
}
colors = {
    "+ strand": COLORS['SIDD'],
    "- strand": COLORS['Z-DNA'],
}

n_rows = 1
n_cols = 4

fig = ps.make_subplots(
    rows=n_rows,
    cols=n_cols,
    shared_xaxes="all",
    shared_yaxes="all",
    horizontal_spacing=0.004,
    vertical_spacing=0.01,
    column_titles=[
        f"<b>{x}</b> ({plotly_data[plotly_data['Flipon'] == x.lower()]['total'].sum():,d})"
        for x in groups
    ],
    y_title="Number of miR per flipon",
    x_title="Flipon count",
)

for i, group in enumerate(groups):
    for category, name in categories.items():
        df = plotly_data[
            (plotly_data["Flipon"] == group.lower())
            & (plotly_data["variable"] == category)
        ]
        sub = pgo.Bar(
            x=df["total"],
            y=df["value"],
            name=name,
            texttemplate="%{x:,d}",
            textposition=df["total"].apply(
                lambda x: "outside" if x < 4000 else "inside"
            ),
            marker=dict(color=colors[name]),
            orientation="h",
            width=0.4,
            showlegend=True if (i == 0) else False,
        )
        fig.add_trace(sub, row=1, col=i + 1)
    if i:
        fig.update_yaxes(ticks="", row=1, col=i + 1)


fig.update_xaxes(tickformat="s")
fig.update_xaxes(ticks="outside", row=n_rows)
fig.update_yaxes(dtick=1)
fig.update_annotations(font=dict(size=18))

fig.update_layout(
    # title=f"Flipons enriched with miRNA seed-regions ({plotly_data['count'].sum():,d})",
    # title_x=0.0035,
    # title_y=.985,
    height=700,
    width=1200,
    font=dict(size=16),
    uniformtext_minsize=16,
    uniformtext_mode="show",
    margin=dict(l=60, r=5, t=25, b=60),
    legend=dict(
        # title_text="Flipon group",
        # traceorder="reversed",
        orientation="h",
        yanchor="bottom",
        y=.925,
        xanchor="right",
        x=.99,
    ),
    template="ggplot2",
)

fig.write_image(D_IMG / "Figure.2D.png", scale=2)
fig.show()
