In [None]:
import os

import numpy
from plotly import offline as plotly
from plotly import graph_objects
from plotly.subplots import make_subplots

from capblood_seq.dataset import Capblood_Seq_Dataset
from capblood_seq import common as cbs
from capblood_seq import viz as cbs_viz

In [None]:
# Load the dataset. This downloads it if it doesn't exist already, and loads it into memory
dataset = Capblood_Seq_Dataset(data_directory="data", pipeline_name="normalized")
dataset.load()
dataset.filter_multi_labeled_cells(cbs.CELL_TYPES)

In [None]:
GENE = "LIPA"

# Specify the subject to compare
SUBJECT_ID = "S1"

# Specify the cell type to filter - or None for all
CELL_TYPE = "Monocytes"

NUM_BINS = 50

In [None]:
subject_transcript_counts = []
other_transcript_counts = []

for sample in cbs.SAMPLE_NAMES:
    for subject_id in cbs.SUBJECT_IDS:
        
        transcript_counts = dataset.get_transcript_counts(
            sample, cell_type=CELL_TYPE, subject_id=subject_id, normalized=True, genes=GENE
        )
        
        if transcript_counts is None:
            continue
        
        transcript_counts = transcript_counts.to_array()
        
        if subject_id == SUBJECT_ID:
            subject_transcript_counts.extend(transcript_counts)
        else:
            other_transcript_counts.extend(transcript_counts)
        
subject_transcript_counts = numpy.array(subject_transcript_counts)
other_transcript_counts = numpy.array(other_transcript_counts)

In [None]:
bin_counts, bin_edges = numpy.histogram(
    numpy.concatenate(
        (
            subject_transcript_counts[subject_transcript_counts != 0],
            other_transcript_counts[other_transcript_counts != 0]
        )
    ),
    bins=NUM_BINS
)

figure = make_subplots(rows=1, cols=2, column_widths=[0.1, 0.9])

bin_counts, bin_edges = numpy.histogram(
    numpy.concatenate(
        (
            subject_transcript_counts[subject_transcript_counts != 0],
            other_transcript_counts[other_transcript_counts != 0]
        )
    ),
    bins=NUM_BINS
)

subject_bin_counts, _ = numpy.histogram(subject_transcript_counts[subject_transcript_counts != 0], bins=bin_edges)
other_bin_counts, _ = numpy.histogram(other_transcript_counts[other_transcript_counts != 0], bins=bin_edges)

subject_trace_name = "%s (u=%.3e, %i cells)" % (SUBJECT_ID, subject_transcript_counts.mean(), len(subject_transcript_counts))
other_trace_name = "Others (u=%.3e, %i cells)" % (other_transcript_counts.mean(), len(other_transcript_counts))

subject_histogram = graph_objects.Bar(
    x=bin_edges,
    y=subject_bin_counts/subject_transcript_counts.shape[0] * 100,
    opacity=0.5,
    name=subject_trace_name,
    marker={
        "color": cbs_viz.SUBJECT_ID_COLORS[SUBJECT_ID]
    }
)

figure.add_trace(subject_histogram, row=1, col=2)

other_histogram = graph_objects.Bar(
    x=bin_edges,
    y=other_bin_counts/other_transcript_counts.shape[0] * 100,
    opacity=0.5,
    name=other_trace_name,
    marker={
        "color": "grey"
    }
)

figure.add_trace(other_histogram, row=1, col=2)

subject_zero_histogram = graph_objects.Bar(
    x=[0],
    y=[subject_transcript_counts[subject_transcript_counts==0].shape[0]/subject_transcript_counts.shape[0] * 100],
    opacity=0.5,
    showlegend=False,
    name=subject_trace_name,
    marker={
        "color": cbs_viz.SUBJECT_ID_COLORS[SUBJECT_ID]
    }
)

figure.add_trace(subject_zero_histogram, row=1, col=1)

other_zero_histogram = graph_objects.Bar(
    x=[0],
    y=[other_transcript_counts[other_transcript_counts==0].shape[0]/other_transcript_counts.shape[0] * 100],
    opacity=0.5,
    showlegend=False,
    name=other_trace_name,
    marker={
        "color": cbs_viz.SUBJECT_ID_COLORS[SUBJECT_ID]
    }
)

plot_title = "%s Expression" % GENE

if CELL_TYPE:
    plot_title += " in %s" % CELL_TYPE

figure.add_trace(other_zero_histogram, row=1, col=1)

figure.update_layout(
    {
        "barmode": "overlay",
        "title": plot_title,
        "plot_bgcolor": "rgba(255, 255, 255, 0)",
        "paper_bgcolor": "rgba(255, 255, 255, 0)",
        "xaxis": {
            "title": "Gene Abundance",
        },
        "yaxis": {
            "title": "% of Cells",
        },
        "bargap": 0
    }
)

figure.update_yaxes(
    {
        "range": [0, 100]
    },
    row=1,
    col=1
)

figure.update_yaxes(
    {
        "range": [0, 16]
    },
    row=1,
    col=2
)

figure.update_xaxes(
    {
        "tickvals": [0]
    },
    row=1,
    col=1
)

plotly.iplot(figure)

file_name = "%s_%s_vs_other_split_histogram" % (GENE, SUBJECT_ID)

if CELL_TYPE:
    file_name += "_%s" % CELL_TYPE.replace(" ", "_")
    
figure.write_image(os.path.join("figures", file_name + ".svg"))
figure.write_html(os.path.join("figures", file_name + ".html"))