In [None]:
import os
import numpy
from plotly import offline as plotly
from plotly import graph_objects
from scipy import stats
from statsmodels.stats import multitest

import capblood_seq
from capblood_seq import config

In [None]:
# Load the dataset. This downloads it if it doesn't exist already, and loads it into memory
dataset = capblood_seq.load_dataset(data_directory="data", pipeline_name="normalized")
dataset.filter_multi_labeled_cells(config.CELL_TYPES)

In [None]:
CELL_TYPES = ["Monocytes"] + [None]

GENE = "S100A12"

SUBJECT_ID = "S2"

In [None]:
subject_colors = []
other_colors = []
subject_means = []
other_means = []
p_values = []
subject_mean_groups = []
other_mean_groups = []
cell_type_label_list = []

for cell_type_index, cell_type in enumerate(CELL_TYPES):
    
    cell_type_subject_means = []
    cell_type_subject_means = []
    
    if cell_type is None:
        cell_type_label = "All Cells"
    else:
        cell_type_label = cell_type
    
    cell_type_label_list.append(cell_type_label)
    
    for subject_index, subject_id in enumerate(config.SUBJECT_IDS):
        
        for sample in config.SAMPLE_NAMES:
            
            transcript_counts = dataset.get_transcript_counts(
                sample,
                cell_type=cell_type,
                subject_id=subject_id,
                normalized=True,
                genes=GENE
            )
            
            if transcript_counts is None:
                continue
            
            transcript_counts = transcript_counts.to_array()
            
            if "AM" in sample:
                color = config.AM_COLOR
            else:
                color = config.PM_COLOR
            
            if subject_id == SUBJECT_ID:
                subject_means.append(transcript_counts.mean())
                subject_colors.append(color)
                subject_mean_groups.append(cell_type_index)
            else:
                other_means.append(transcript_counts.mean())
                other_colors.append(color)
                other_mean_groups.append(cell_type_index)
    
    _, p_value = stats.ttest_ind(subject_means, other_means)
    p_values.append(p_value)

y_max = max(numpy.abs(subject_means).max(), numpy.abs(other_means).max()) * 1.1

subject_box_trace = graph_objects.Box(
    x=subject_mean_groups,
    y=subject_means,
    line={
        "color": config.SUBJECT_ID_COLORS[subject_id]
    },
    name=subject_id
)

subject_mean_groups = numpy.array(subject_mean_groups)
other_mean_groups = numpy.array(other_mean_groups)

subject_mean_groups_jittered = subject_mean_groups.astype(numpy.float32).copy()
other_mean_groups_jittered = other_mean_groups.astype(numpy.float32).copy()

for cell_type_index in range(len(cell_type_label_list)):
    subject_mean_groups_jittered[subject_mean_groups_jittered == cell_type_index] = cell_type_index -0.35
    other_mean_groups_jittered[other_mean_groups_jittered == cell_type_index] = cell_type_index

subject_mean_groups_jittered += numpy.random.rand(len(subject_mean_groups_jittered))/40
other_mean_groups_jittered += numpy.random.rand(len(other_mean_groups_jittered))/40

subject_scatter_trace = graph_objects.Scatter(
    x=subject_mean_groups_jittered,
    y=subject_means,
    marker_color=subject_colors,
    mode="markers",
    showlegend=False,
    name=subject_id
)

other_box_trace = graph_objects.Box(
    x=other_mean_groups,
    y=other_means,
    line={
        "color": "grey"
    },
    name="Others"
)

other_scatter_trace = graph_objects.Scatter(
    x=other_mean_groups_jittered,
    y=other_means,
    marker_color=other_colors,
    mode="markers",
    showlegend=False,
    name="Others"
)

y_min = 0

title = "Mean expression"

layout = graph_objects.Layout(
    {
        "yaxis": {
            "range": [y_min, y_max],
            "title": title,
            "exponentformat": "power"
        },
        "plot_bgcolor": "rgba(0, 0, 0, 0)",
        "width": 800,
        "title": {
            "text": "%s Expression %s vs Others" % (GENE, SUBJECT_ID),
            "xanchor": "center",
            "xref": "container",
            "x": 0.5
        },
        "boxmode": "group",
        "xaxis": {
            "tickvals": list(range(len(cell_type_label_list))),
            "ticktext": cell_type_label_list
        }
    }
)

figure = graph_objects.Figure(data=[subject_box_trace, subject_scatter_trace, other_box_trace, other_scatter_trace], layout=layout)

for cell_type_index, cell_type_label in enumerate(cell_type_label_list):

    significance_line = graph_objects.layout.Shape(
        type="line",
        x0=cell_type_index-0.175,
        x1=cell_type_index+0.175,
        y0=y_max,
        y1=y_max,
        line=dict(
            color="Black",
            width=5
        )
    )

    significance_bracket_left = graph_objects.layout.Shape(
        type="line",
        x0=cell_type_index-0.175,
        x1=cell_type_index-0.175,
        y0=y_max,
        y1=y_max * 0.95,
        line=dict(
            color="Black",
            width=4
        )
    )

    significance_bracket_right = graph_objects.layout.Shape(
        type="line",
        x0=cell_type_index+0.175,
        x1=cell_type_index+0.175,
        y0=y_max,
        y1=y_max * 0.95,
        line=dict(
            color="Black",
            width=4
        )
    )

    figure.add_shape(significance_line)
    figure.add_shape(significance_bracket_left)
    figure.add_shape(significance_bracket_right)

    figure.add_annotation(
        graph_objects.layout.Annotation(
            text="p=%.1e" % p_values[cell_type_index],
            showarrow=False,
            yanchor="bottom",
            yref="y",
            y=y_max,
            x=cell_type_index,
            xref="x",
            xanchor="center"
        )
    )

plotly.iplot(figure)


figure.write_image(os.path.join("figures", "%s_%s_vs_others_box_plot.svg" % (GENE, SUBJECT_ID)))
figure.write_html(os.path.join("figures", "%s_%s_vs_others_box_plot.html" % (GENE, SUBJECT_ID)))