In [None]:
"""Create violin plots of the distribution of region lengths for various feature sets."""
# pylint: disable=import-error, unused-import, redefined-outer-name, use-dict-literal, too-many-lines

In [9]:
from pathlib import Path

import pandas as pd
import plotly.graph_objects as go
from IPython.display import display

In [10]:
base_dir = Path.home() / "Projects/epiclass/output/paper"
base_data_dir = base_dir / "data"
base_fig_dir = base_dir / "figures"
paper_dir = base_dir

In [11]:
reg_regions_path = (
    base_data_dir
    / "ChromHMM"
    / "StackedChromHMM_hg38_EnhancerMaxK27acCorrelations.txt.gz"
)
df_reg_regions = pd.read_csv(reg_regions_path, sep="\t")

gene_regions_path = base_data_dir / "filters" / "hg38.gene_regions.bed"
df_gene_regions = pd.read_csv(gene_regions_path, sep="\t", header=None)

In [12]:
df_reg_regions["len"] = df_reg_regions.loc[:, "end"] - df_reg_regions.loc[:, "start"]

df_gene_regions["len"] = df_gene_regions.loc[:, 2] - df_gene_regions.loc[:, 1]

In [13]:
vals_10kb = df_reg_regions.loc[0:303114, :]
vals_100kb = df_reg_regions.loc[0:30321, :]
vals_gene = df_gene_regions.loc[0:30321, :]

In [None]:
logdir = base_fig_dir / "regions_len_dist"
logdir.mkdir(parents=False, exist_ok=True)


fig = go.Figure()

for df, name in zip(
    [vals_10kb, vals_100kb, vals_gene], ["300k-reg", "30k-reg", "20k-gene"]
):
    fig.add_trace(
        go.Violin(
            y=df["len"],
            box_visible=True,
            meanline_visible=True,
            opacity=0.9,
            name=name,
            spanmode="hard",
            points=False,
        )
    )

fig.update_layout(
    title="Distribution of regions lengths",
    yaxis_title="Length (bp)",
    showlegend=True,
)

path = logdir / "regions_len_dist"
# fig.write_html(path.with_suffix(".html"))
# fig.write_image(path.with_suffix(".png"))
# fig.write_image(path.with_suffix(".svg"))
fig.show()

for zoom in [100, 25]:
    fig.update_yaxes(range=[0, zoom * 1000])

    path = logdir / f"regions_len_dist_zoom_{zoom}kb"
    # fig.write_html(path.with_suffix(".html"))
    # fig.write_image(path.with_suffix(".png"))
    # fig.write_image(path.with_suffix(".svg"))
    fig.show()