In [None]:
import os

import numpy
from plotly import offline as plotly
from plotly import graph_objects
from plotly.subplots import make_subplots

import capblood_seq
from capblood_seq import config

In [None]:
# Load the dataset. This downloads it if it doesn't exist already, and loads it into memory
dataset = capblood_seq.load_dataset(data_directory="data", pipeline_name="normalized")

In [None]:
GENE = "DDIT4"

# Specify the cell type to plot - or None for all
CELL_TYPE = "NK Cells"

# Specify the subject to plot - or None for all
SUBJECT_ID = None

NUM_BINS = 50

In [None]:
AM_transcript_counts = []
PM_transcript_counts = []

for sample in config.SAMPLE_NAMES:
    if "AM" in sample:
        AM_transcript_counts.extend(
            dataset.get_transcript_counts(
                sample, cell_type=CELL_TYPE, subject_id=SUBJECT_ID, normalized=True, genes=GENE
            ).to_array()[:, 0]
        )
    else:
        PM_transcript_counts.extend(
            dataset.get_transcript_counts(
                sample, cell_type=CELL_TYPE, subject_id=SUBJECT_ID, normalized=True, genes=GENE
            ).to_array()[:, 0]
        )
        
AM_transcript_counts = numpy.array(AM_transcript_counts)
PM_transcript_counts = numpy.array(PM_transcript_counts)

In [None]:
bin_counts, bin_edges = numpy.histogram(
    numpy.concatenate(
        (
            AM_transcript_counts[AM_transcript_counts != 0],
            PM_transcript_counts[PM_transcript_counts != 0]
        )
    ),
    bins=NUM_BINS
)

figure = make_subplots(rows=1, cols=2, column_widths=[0.1, 0.9])

bin_counts, bin_edges = numpy.histogram(
    numpy.concatenate(
        (
            AM_transcript_counts[AM_transcript_counts != 0],
            PM_transcript_counts[PM_transcript_counts != 0]
        )
    ),
    bins=NUM_BINS
)

AM_bin_counts, _ = numpy.histogram(AM_transcript_counts[AM_transcript_counts != 0], bins=bin_edges)
PM_bin_counts, _ = numpy.histogram(PM_transcript_counts[PM_transcript_counts != 0], bins=bin_edges)

AM_trace_name = "AM (u=%.3e, %i cells)" % (AM_transcript_counts.mean(), len(AM_transcript_counts))
PM_trace_name = "PM (u=%.3e, %i cells)" % (PM_transcript_counts.mean(), len(PM_transcript_counts))

AM_histogram = graph_objects.Bar(
    x=bin_edges,
    y=AM_bin_counts/AM_transcript_counts.shape[0] * 100,
    opacity=0.5,
    name=AM_trace_name,
    marker={
        "color": config.AM_COLOR
    }
)

figure.add_trace(AM_histogram, row=1, col=2)

PM_histogram = graph_objects.Bar(
    x=bin_edges,
    y=PM_bin_counts/PM_transcript_counts.shape[0] * 100,
    opacity=0.5,
    name=PM_trace_name,
    marker={
        "color": config.PM_COLOR
    }
)

figure.add_trace(PM_histogram, row=1, col=2)

AM_zero_histogram = graph_objects.Bar(
    x=[0],
    y=[AM_transcript_counts[AM_transcript_counts==0].shape[0]/AM_transcript_counts.shape[0] * 100],
    opacity=0.5,
    showlegend=False,
    name=AM_trace_name,
    marker={
        "color": config.AM_COLOR
    }
)

figure.add_trace(AM_zero_histogram, row=1, col=1)

PM_zero_histogram = graph_objects.Bar(
    x=[0],
    y=[PM_transcript_counts[PM_transcript_counts==0].shape[0]/PM_transcript_counts.shape[0] * 100],
    opacity=0.5,
    showlegend=False,
    name=PM_trace_name,
    marker={
        "color": config.PM_COLOR
    }
)

plot_title = "%s Expression" % GENE

if SUBJECT_ID or CELL_TYPE:
    plot_title += " in "
    if SUBJECT_ID:
        plot_title += "%s " % SUBJECT_ID
    if CELL_TYPE:
        plot_title += CELL_TYPE
    
    plot_title = plot_title.strip()

figure.add_trace(PM_zero_histogram, row=1, col=1)

figure.update_layout(
    {
        "barmode": "overlay",
        "title": plot_title,
        "plot_bgcolor": "rgba(255, 255, 255, 0)",
        "paper_bgcolor": "rgba(255, 255, 255, 0)",
        "xaxis": {
            "title": "Gene Abundance",
        },
        "yaxis": {
            "title": "% of Cells",
        },
        "bargap": 0
    }
)

figure.update_yaxes(
    {
        "range": [0, 100]
    },
    row=1,
    col=1
)

figure.update_yaxes(
    {
        "range": [0, 16]
    },
    row=1,
    col=2
)

figure.update_xaxes(
    {
        "tickvals": [0]
    },
    row=1,
    col=1
)

plotly.iplot(figure)

file_name = "%s_AM_PM_split_histogram" % GENE

if SUBJECT_ID:
    file_name += "_%s" % SUBJECT_ID
if CELL_TYPE:
    file_name += "_%s" % CELL_TYPE.replace(" ", "_")
    
figure.write_image(os.path.join("figures", file_name + ".svg"))
figure.write_html(os.path.join("figures", file_name + ".html"))