In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('../')

In [3]:
import numpy as np
import pandas as pd
import anndata
from rnasieve.preprocessing import model_from_raw_counts

In [4]:
# Example raw count prep
subset_bulk = anndata.read_h5ad('../example_data/muscle_subset_bulk.h5ad')
subset = anndata.read_h5ad('../example_data/muscle_subset.h5ad')

# Raw counts prep
counts_by_onto_class = {}
for sc in subset:
    cell_onto_class = sc.obs['cell_ontology_class'][0]
    if cell_onto_class not in counts_by_onto_class:
        counts_by_onto_class[cell_onto_class] = np.empty((sc.X.shape[0], 0), dtype=np.float32)
    counts_by_onto_class[cell_onto_class] = np.hstack((counts_by_onto_class[cell_onto_class], sc.X.reshape((-1, 1))))

# Bulk prep
G = subset_bulk.n_vars
bulk_by_age = {}
for bulk in subset_bulk:
    age = bulk.obs['Age'][0]
    if age not in bulk_by_age:
        bulk_by_age[age] = []
    bulk_by_age[age].append(bulk.X.reshape(-1, 1))

bulk_labels = []
psis = np.empty((G, 0), dtype=np.float32)
for age in sorted(bulk_by_age.keys()):
    bulks = bulk_by_age[age]
    for i in range(len(bulks)):
        bulk_labels.append("{} months, subject {}".format(age, i))
        psis = np.hstack((psis, bulks[i]))

  warn_flatten()


In [5]:
model, cleaned_psis = model_from_raw_counts(counts_by_onto_class, psis[:, :2])

In [6]:
model.predict(cleaned_psis)

Unnamed: 0,B cell,T cell,endothelial cell,macrophage,mesenchymal stem cell,skeletal muscle satellite cell
Bulk 0,0.027959,0.0,0.149952,0.243414,0.14638,0.432295
Bulk 1,0.037862,0.0,0.141348,0.286565,0.219226,0.314999


In [14]:
model.compute_marginal_confidence_intervals(sig=0.05)

[[(-0.03363138009238814, 0.08954988874594808),
  (-0.0008901472977363052, 0.0008901472977363052),
  (0.010177923324470733, 0.2897258034674737),
  (0.04372689923699902, 0.44310074009092637),
  (-0.027470331177995383, 0.3202312503617941),
  (0.23434973175615395, 0.6302394742866176)],
 [(-0.017539613728021416, 0.09326310964468985),
  (-1.3783866126004802e-06, 1.3783866126004802e-06),
  (0.011459513825478718, 0.2712354931566428),
  (0.126884452096752, 0.4462454385936314),
  (-0.032230728414468834, 0.4706834114671079),
  (0.1511813517638615, 0.47881757159432625)]]

In [15]:
model.plot_proportions('bar').properties(title="Muscle Proportion Estimates")

In [16]:
model.plot_proportions('stacked').properties(title="Muscle Proportion Estimates")