In [1]:
import anndata
import os
import requests

save_path = "data/example_sce.h5ad"
if not os.path.exists(save_path):
    response = requests.get("https://go.wisc.edu/69435h")
    with open(save_path, "wb") as f:
        f.write(response.content)

example_sce = anndata.read_h5ad(save_path)
example_sce

AnnData object with n_obs × n_vars = 2087 × 100
    obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'cell_type', 'sizeFactor', 'pseudotime'
    var: 'highly_variable_genes'
    uns: 'X_name', 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca'
    obsm: 'PCA', 'UMAP', 'X_pca', 'X_umap'
    layers: 'counts', 'cpm', 'logcounts', 'spliced', 'unspliced'
    obsp: 'connectivities', 'distances'

In [2]:
from scdesigner.simulators import NegBinCopulaSimulator
from scdesigner.transform import nullify, amplify, data_frame_mask

sim = NegBinCopulaSimulator(epochs=100)
sim.fit(example_sce, "~ bs(pseudotime, degree=6)")
outcomes = example_sce.var_names[:4]
mask = data_frame_mask(sim.params["coef_mean"], ["pseudotime"], outcomes)

                                                            

In [3]:
from copy import deepcopy

null_sim = deepcopy(sim)
null_sim.params = nullify(sim.params, "coef_mean", mask)
null_sim.params["coef_mean"]

Unnamed: 0,Pyy,Iapp,Chgb,Rbp4,Spp1,Chga,Cck,Ins1,Nnat,Ins2,...,Nkx6-1,Fxyd3,Hn1,Smarcd2,Pdia6,Ffar2,Hes6,Serpinh1,Npy,1110012L19Rik
Intercept,2.197865,2.268209,1.318629,1.286049,3.840294,0.44833,-0.349397,2.105087,1.068234,1.211984,...,0.55288,0.290994,1.440957,-0.160437,1.048037,-1.233168,-0.637515,1.724128,-2.435935,-0.178139
"bs(pseudotime, degree=6)[1]",0.0,0.0,0.0,0.0,3.003995,-15.069673,-1.397553,-10.418837,-11.106424,-13.2831,...,-1.553561,-3.440922,-1.226781,1.312676,0.608414,3.222897,3.997242,-0.758271,-5.780034,-3.938097
"bs(pseudotime, degree=6)[2]",0.0,0.0,0.0,0.0,-3.633759,4.878799,6.335585,1.287711,10.443921,-2.103594,...,3.01054,2.766717,3.452707,4.106812,0.911357,-0.469059,3.836551,1.942889,-2.752352,3.260375
"bs(pseudotime, degree=6)[3]",0.0,0.0,0.0,0.0,-14.695806,5.241379,5.936083,-6.154891,-7.455821,-6.045755,...,1.158814,2.801614,0.697978,1.39144,-4.358405,4.425304,0.441014,-1.976071,-3.182852,4.897079
"bs(pseudotime, degree=6)[4]",0.0,0.0,0.0,0.0,-1.076773,2.775726,2.266021,-8.846365,-4.309462,-8.769652,...,-0.812769,-1.88489,-4.701143,-2.436548,-1.08131,3.203226,-0.545738,-5.097927,-4.957916,-1.141697
"bs(pseudotime, degree=6)[5]",0.0,0.0,0.0,0.0,-5.658473,1.579607,-1.084344,1.496725,3.157531,2.442446,...,1.184013,2.08483,0.848783,0.07352,0.972106,0.687638,2.092434,-1.340444,-3.438744,-1.626804
"bs(pseudotime, degree=6)[6]",0.0,0.0,0.0,0.0,-4.979625,2.34556,1.942488,3.060781,3.689172,3.768801,...,1.268168,-0.480219,-1.241685,-0.989558,1.673595,1.211747,0.363372,-2.069894,6.488352,-0.483171


In [4]:
samples = null_sim.sample(example_sce.obs)
samples.X[:10, :10]

array([[  1.,   5.,   4.,   0.,   0.,  16.,  20.,   0.,   0.,   0.],
       [ 10.,   0.,   1.,   0.,   0.,   6.,  25.,   0.,   0.,   0.],
       [  0.,   9.,   3.,   5.,   0.,   0.,   3.,   0.,   0.,   0.],
       [  0.,  24.,   1.,   2.,   1.,  12.,   8.,   0.,  39.,   2.],
       [  2.,   7.,   0.,  14.,   0.,   5.,   3.,   0.,   0.,   0.],
       [  0.,   0.,   2.,   0.,   8.,   0.,  16.,   0.,   0.,   0.],
       [ 10.,  15.,   0.,   8., 111.,   0.,   0.,   1.,   0.,   0.],
       [ 18.,   1.,   1.,  16.,   1.,  13.,   4.,   1.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0.,  19.,   0.,  24.,  16.,  12.],
       [  8.,   0.,   1.,   9.,   0.,  30.,   1.,   0.,   0.,   0.]])

In [5]:
from scdesigner.diagnose import compare_means, compare_standard_deviation, compare_umap
import numpy as np

log_relative = lambda x: np.log1p(x / x.sum(axis=1, keepdims=True))
compare_means(example_sce, samples, log_relative)


In [6]:
compare_standard_deviation(example_sce, samples, log_relative)

In [7]:
compare_umap(example_sce, samples, transform=log_relative)

In [8]:
sim = NegBinCopulaSimulator(epochs=10)
sim.fit(example_sce, "~ bs(pseudotime, degree=2)")
mask = data_frame_mask(sim.params["covariance"], outcomes)

null_sim = deepcopy(sim)
null_sim.params = nullify(sim.params, "covariance", mask)
null_sim.params["covariance"]

                                                          

Unnamed: 0,Pyy,Iapp,Chgb,Rbp4,Spp1,Chga,Cck,Ins1,Nnat,Ins2,...,Nkx6-1,Fxyd3,Hn1,Smarcd2,Pdia6,Ffar2,Hes6,Serpinh1,Npy,1110012L19Rik
Pyy,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Iapp,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Chgb,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Rbp4,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Spp1,-0.103972,-0.052772,-0.327343,-0.161285,1.206352,-0.354507,-0.351393,0.051237,-0.048364,-0.021010,...,-0.060389,-0.144016,0.005489,-0.140708,0.279837,-0.127349,-0.063964,0.093021,0.045659,-0.108340
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Ffar2,-0.015797,-0.018782,0.157943,0.015012,-0.127349,0.129886,0.067050,-0.019765,0.003336,-0.029463,...,0.044218,0.099760,-0.045862,0.022997,-0.035705,0.951890,-0.004690,-0.058877,-0.038471,0.080376
Hes6,0.051752,0.189493,-0.095503,-0.003305,-0.063964,-0.077502,0.070902,0.142460,0.156856,0.152514,...,0.150239,-0.027516,0.268458,0.260288,0.176408,-0.004690,1.004845,0.214758,0.051500,0.052345
Serpinh1,0.072576,0.147960,-0.153936,0.015995,0.093021,-0.113549,-0.036812,0.162800,0.127419,0.125742,...,0.088325,-0.023532,0.220583,0.171846,0.253470,-0.058877,0.214758,1.094993,0.089669,0.015524
Npy,0.040682,0.153162,0.018025,0.036084,0.045659,0.058138,-0.011110,0.193768,0.152864,0.135038,...,0.041336,-0.036199,0.056602,0.037864,0.038499,-0.038471,0.051500,0.089669,0.940347,0.039362


In [9]:
mask = data_frame_mask(sim.params["coef_mean"], ["pseudotime"], outcomes)
null_sim.params = amplify(sim.params, "coef_mean", mask, factor=2)
null_sim.params["coef_mean"] / sim.params["coef_mean"]

Unnamed: 0,Pyy,Iapp,Chgb,Rbp4,Spp1,Chga,Cck,Ins1,Nnat,Ins2,...,Nkx6-1,Fxyd3,Hn1,Smarcd2,Pdia6,Ffar2,Hes6,Serpinh1,Npy,1110012L19Rik
Intercept,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
"bs(pseudotime, degree=2)[1]",2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
"bs(pseudotime, degree=2)[2]",2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [10]:
sim.sample(example_sce.obs)

AnnData object with n_obs × n_vars = 2087 × 100
    obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'cell_type', 'sizeFactor', 'pseudotime', '_copula_group'