In [2]:
import numpy as np
import pandas as pd
import plotly.graph_objects as pgo
import plotly.subplots as ps
import plotly.express as px

from pathlib import Path

pd.set_option('display.max_rows', 4)

d_root = Path("/home/fpavlov/projects/article_conserved_miRNA")
d_data = d_root / "data"
d_img = d_root / "img"

# Input
f_flipon_to_data = d_data / "flipon_to_data.tsv"
f_flipon_to_data_og = d_data / "flipon_to_data_og.tsv"


In [139]:
def get_mirna_number(array: list):
    total = 0
    for mirna in array:
        if '(' in mirna:
            mirna_number = int(''.join(mirna.split(' (')[-1][:-1]))
            if mirna_number > 10:
                total += 1
            else:
                total += mirna_number
        else:
            total += 1
    return total

In [142]:
flipon_to_data = pd.read_table(f_flipon_to_data_og)

plotly_data = (
    flipon_to_data.assign(
        group = lambda x: x['group'].str.replace('quadruplex', 'g4'),
        mirna_plus_count=lambda x: x["miRNA (+ strand)"].str.split(", ").apply(lambda x: get_mirna_number(x) if type(x) is list else 0).astype(pd.Int64Dtype()),
        mirna_minus_count=lambda x: x["miRNA (- strand)"].str.split(", ").apply(lambda x: get_mirna_number(x) if type(x) is list else 0).astype(pd.Int64Dtype()),
    )
    .melt(id_vars=["group"], value_vars=["mirna_plus_count", "mirna_minus_count"])
    .groupby("group")
    .value_counts()
    .to_frame()
    .reset_index()
    .rename(columns={0: "count"})
)
plotly_data


groups = ("G4", "Z-DNA", "SIDD", "H-DNA")
categories = {
    "mirna_plus_count": "+ strand",
    "mirna_minus_count": "- strand",
}
colors = {
    "+ strand": "tomato",
    "- strand": "royalblue",
}

n_rows = 1
n_cols = 4

fig = ps.make_subplots(
    rows=n_rows,
    cols=n_cols,
    shared_xaxes="all",
    shared_yaxes="all",
    horizontal_spacing=0.004,
    vertical_spacing=0.01,
    column_titles=[
        f"<b>{x}</b> ({plotly_data[plotly_data['group'] == x.lower()]['count'].sum():,d})"
        for x in groups
    ],
    y_title="Number of miRNAs per flipon",
    x_title="Flipon count",
)

for i, group in enumerate(groups):
    for category, name in categories.items():
        df = plotly_data[
            (plotly_data["group"] == group.lower())
            & (plotly_data["variable"] == category)
        ]
        sub = pgo.Bar(
            x=df["count"],
            y=df["value"],
            name=name,
            texttemplate="%{x:,d}",
            textposition=df["count"].apply(
                lambda x: "outside" if x < 300 else "inside"
            ),
            marker=dict(color=colors[name]),
            orientation="h",
            width=0.45,
            showlegend=True if (i == 0) else False,
        )
        fig.add_trace(sub, row=1, col=i + 1)
    if i:
        fig.update_yaxes(ticks="", row=1, col=i + 1)


fig.update_xaxes(tickformat="s")
fig.update_xaxes(ticks="outside", row=n_rows)
fig.update_yaxes(dtick=1)
fig.update_annotations(font=dict(size=14))

fig.update_layout(
    # title=f"Flipons enriched with miRNA seed-regions ({plotly_data['count'].sum():,d})",
    # title_x=0.0035,
    # title_y=.985,
    height=700,
    width=1600,
    font=dict(family="monospace", size=12),
    uniformtext_minsize=12,
    uniformtext_mode="show",
    margin=dict(l=60, r=5, t=25, b=60),
    legend=dict(
        # title_text="Flipon group",
        # traceorder="reversed",
        orientation="h",
        yanchor="bottom",
        y=1.05,
        xanchor="right",
        x=1,
    ),
    template="ggplot2",
)

fig.write_image(d_img / "flipon-to-mirna-mapping-strand-counts-all.png")
fig.show()



Columns (4) have mixed types. Specify dtype option on import or set low_memory=False.



In [143]:
flipon_to_data = pd.read_table(f_flipon_to_data)

plotly_data = (
    flipon_to_data.assign(
        group = lambda x: x['group'].str.replace('quadruplex', 'g4'),
        mirna_plus_count=lambda x: x["miRNA (+ strand)"].str.split(", ").apply(lambda x: get_mirna_number(x) if type(x) is list else 0).astype(pd.Int64Dtype()),
        mirna_minus_count=lambda x: x["miRNA (- strand)"].str.split(", ").apply(lambda x: get_mirna_number(x) if type(x) is list else 0).astype(pd.Int64Dtype()),
    )
    .melt(id_vars=["group"], value_vars=["mirna_plus_count", "mirna_minus_count"])
    .groupby("group")
    .value_counts()
    .to_frame()
    .reset_index()
    .rename(columns={0: "count"})
)
plotly_data


groups = ("G4", "Z-DNA", "SIDD", "H-DNA")
categories = {
    "mirna_plus_count": "+ strand",
    "mirna_minus_count": "- strand",
}
colors = {
    "+ strand": "tomato",
    "- strand": "royalblue",
}

n_rows = 1
n_cols = 4

fig = ps.make_subplots(
    rows=n_rows,
    cols=n_cols,
    shared_xaxes="all",
    shared_yaxes="all",
    horizontal_spacing=0.004,
    vertical_spacing=0.01,
    column_titles=[
        f"<b>{x}</b> ({plotly_data[plotly_data['group'] == x.lower()]['count'].sum():,d})"
        for x in groups
    ],
    y_title="Number of miRNAs per flipon",
    x_title="Flipon count",
)

for i, group in enumerate(groups):
    for category, name in categories.items():
        df = plotly_data[
            (plotly_data["group"] == group.lower())
            & (plotly_data["variable"] == category)
        ]
        sub = pgo.Bar(
            x=df["count"],
            y=df["value"],
            name=name,
            texttemplate="%{x:,d}",
            textposition=df["count"].apply(
                lambda x: "outside" if x < 300 else "inside"
            ),
            marker=dict(color=colors[name]),
            orientation="h",
            width=0.45,
            showlegend=True if (i == 0) else False,
        )
        fig.add_trace(sub, row=1, col=i + 1)
    if i:
        fig.update_yaxes(ticks="", row=1, col=i + 1)


fig.update_xaxes(tickformat="s")
fig.update_xaxes(ticks="outside", row=n_rows)
fig.update_yaxes(dtick=1)
fig.update_annotations(font=dict(size=14))

fig.update_layout(
    # title=f"Flipons enriched with miRNA seed-regions ({plotly_data['count'].sum():,d})",
    # title_x=0.0035,
    # title_y=.985,
    height=700,
    width=1600,
    font=dict(family="monospace", size=12),
    uniformtext_minsize=12,
    uniformtext_mode="show",
    margin=dict(l=60, r=5, t=25, b=60),
    legend=dict(
        # title_text="Flipon group",
        # traceorder="reversed",
        orientation="h",
        yanchor="bottom",
        y=1.05,
        xanchor="right",
        x=1,
    ),
    template="ggplot2",
)

fig.write_image(d_img / "flipon-to-mirna-mapping-strand-counts-mirna-only.png")
fig.show()
