In [1]:
%load_ext autoreload
%autoreload 2

In [4]:
import sys
sys.path.append('../')

In [5]:
import numpy as np
import pandas as pd
import anndata
from rnasieve.preprocessing import model_from_raw_counts

In [6]:
# Example raw count prep
subset_bulk = anndata.read_h5ad('../example_data/muscle_subset_bulk.h5ad')
subset = anndata.read_h5ad('../example_data/muscle_subset.h5ad')

# Raw counts prep
counts_by_onto_class = {}
for sc in subset:
    cell_onto_class = sc.obs['cell_ontology_class'][0]
    if cell_onto_class not in counts_by_onto_class:
        counts_by_onto_class[cell_onto_class] = np.empty((sc.X.shape[0], 0), dtype=np.float32)
    counts_by_onto_class[cell_onto_class] = np.hstack((counts_by_onto_class[cell_onto_class], sc.X.reshape((-1, 1))))

# Bulk prep
G = subset_bulk.n_vars
bulk_by_age = {}
for bulk in subset_bulk:
    age = bulk.obs['Age'][0]
    if age not in bulk_by_age:
        bulk_by_age[age] = []
    bulk_by_age[age].append(bulk.X.reshape(-1, 1))

bulk_labels = []
psis = np.empty((G, 0), dtype=np.float32)
for age in sorted(bulk_by_age.keys()):
    bulks = bulk_by_age[age]
    for i in range(len(bulks)):
        bulk_labels.append("{} months, subject {}".format(age, i))
        psis = np.hstack((psis, bulks[i]))

  warn_flatten()


In [7]:
model, cleaned_psis = model_from_raw_counts(counts_by_onto_class, psis[:, :2])

In [8]:
model.predict(cleaned_psis)

Unnamed: 0,B cell,T cell,endothelial cell,macrophage,mesenchymal stem cell,skeletal muscle satellite cell
Bulk 0,0.027959,0.0,0.149951,0.243414,0.146378,0.432297
Bulk 1,0.037862,0.0,0.141346,0.286565,0.219228,0.315


In [14]:
model.compute_marginal_confidence_intervals(sig=0.75)

[[(-0.008607292412463274, 0.06452588854772831),
  (-0.01992898235371815, 0.01992898235371815),
  (0.11623935575866765, 0.18366257401447142),
  (0.19557494414366083, 0.29125332217306144),
  (0.08841007624023756, 0.20434690593692423),
  (0.4067282077063674, 0.4578660178913444)],
 [(0.0018342069591793034, 0.07388937649910354),
  (-0.01979503227983546, 0.01979503227983546),
  (0.11085979696378759, 0.17183142916583524),
  (0.24459928145478693, 0.3285301470452854),
  (0.1677983165037742, 0.27065700744031973),
  (0.28885735502566595, 0.3411430829422621)]]

In [15]:
model.plot_proportions('bar').properties(title="Muscle Proportion Estimates")

In [16]:
model.plot_proportions('stacked').properties(title="Muscle Proportion Estimates")