In [None]:
import os
import numpy
from plotly import offline as plotly
from plotly import graph_objects

import capblood_seq
from capblood_seq import config

In [None]:
# Load the dataset. This downloads it if it doesn't exist already, and loads it into memory
dataset = capblood_seq.load_dataset(data_directory="data", pipeline_name="debris_filtered")

In [None]:
# We want to compare percentages only relative to the cells we were able to identify
dataset.filter_unlabeled_cells()
dataset.filter_multi_labeled_cells(config.SUBJECT_IDS)

cell_subtype_labels = ["%s %s" % (cell_subtype, cell_type) for cell_type in config.CELL_SUBTYPES for cell_subtype in config.CELL_SUBTYPES[cell_type]]
dataset.filter_multi_labeled_cells(config.CELL_TYPES)
dataset.filter_multi_labeled_cells(cell_subtype_labels)

In [None]:
# For each cell type, get the percent they make up in each sample
cell_type_percentages = {}
cell_type_errors = {}
cell_type_num_cells = {}

for cell_type in config.CELL_TYPES:
    cell_type_percentages[cell_type] = []
    cell_type_num_cells[cell_type] = 0
    for sample in config.SAMPLE_NAMES:
        for subject_id in config.SUBJECT_IDS:
            num_subject_cells = dataset.get_num_cells(sample, subject_id=subject_id)
            if not num_subject_cells:
                continue
            num_cells = dataset.get_num_cells(sample, cell_type=cell_type, subject_id=subject_id)
            cell_type_num_cells[cell_type] += num_cells
            cell_percentage = num_cells/num_subject_cells
            cell_type_percentages[cell_type].append(cell_percentage)
    cell_type_errors[cell_type] = numpy.std(cell_type_percentages[cell_type])
    cell_type_percentages[cell_type] = numpy.mean(cell_type_percentages[cell_type])

# Repeat for each subtype
cell_subtype_percentages = {}
cell_subtype_errors = {}
cell_subtype_num_cells = {}

for cell_type, cell_subtypes in config.CELL_SUBTYPES.items():
    cell_subtype_percentages[cell_type] = {}
    cell_subtype_errors[cell_type] = {}
    cell_subtype_num_cells[cell_type] = {}
    for cell_subtype in cell_subtypes:
        cell_subtype_percentages[cell_type][cell_subtype] = []
        cell_subtype_num_cells[cell_type][cell_subtype] = 0
        for sample in config.SAMPLE_NAMES:
            for subject_id in config.SUBJECT_IDS:
                num_subject_cells = dataset.get_num_cells(sample, subject_id=subject_id)
                if not num_subject_cells:
                    continue
                num_cells = dataset.get_num_cells(sample, cell_type=" ".join([cell_subtype, cell_type]), subject_id=subject_id)
                cell_subtype_num_cells[cell_type][cell_subtype] += num_cells
                cell_percentage = num_cells/num_subject_cells
                cell_subtype_percentages[cell_type][cell_subtype].append(cell_percentage)
        cell_subtype_errors[cell_type][cell_subtype] = numpy.std(cell_subtype_percentages[cell_type][cell_subtype])
        cell_subtype_percentages[cell_type][cell_subtype] = numpy.mean(cell_subtype_percentages[cell_type][cell_subtype])

In [None]:
labels = []
parent_labels = []
values = []
colors = []

for cell_type, percentages in cell_type_percentages.items():
    
    label = cell_type
    label += "<BR>%.1f" % (cell_type_percentages[cell_type] * 100)
    label += "&#177;%.1f%%" % (cell_type_errors[cell_type] * 100)
    
    labels.append(label)
    parent_labels.append("All")
    values.append(cell_type_num_cells[cell_type])
    colors.append(config.CELL_TYPE_HIERARCHICAL_COLORS[cell_type])
    
    if cell_type in cell_subtype_percentages:
        
        parent_label = label
        
        for cell_subtype, percentages in cell_subtype_percentages[cell_type].items():
            label = cell_subtype
            label += "<BR>%.1f" % (cell_subtype_percentages[cell_type][cell_subtype] * 100)
            label += "&#177;%.1f%%" % (cell_subtype_errors[cell_type][cell_subtype] * 100)
            labels.append(label)
            parent_labels.append(parent_label)
            values.append(cell_subtype_num_cells[cell_type][cell_subtype])
            colors.append(config.CELL_TYPE_HIERARCHICAL_COLORS[" ".join([cell_subtype, cell_type])])
    


sunburst = graph_objects.Sunburst(
    labels=labels,
    parents=parent_labels,
    values=values,
    branchvalues="total",
    leaf={"opacity": 1.0},
    
    marker=dict(
        colors=colors
    )
)

    
layout = graph_objects.Layout(
    title="Cell Type Distribution",
    plot_bgcolor="rgba(0,0,0,0)",
    paper_bgcolor="rgba(0,0,0,0)",
    showlegend=False
)
        
figure = graph_objects.Figure([sunburst], layout=layout)

plotly.iplot(figure)

figure.write_image(os.path.join("figures", "cell_type_sunburst.svg"))
figure.write_html(os.path.join("figures", "cell_type_sunburst.html"))