In [1]:
import anndata
from scdesigner.minimal.simulator import scdesigner
from scdesigner.minimal.negbin import NegBin
from scdesigner.minimal.standard_covariance import StandardCovariance

sim = scdesigner(
    NegBin("~ bs(pseudotime, df=5)"),
    StandardCovariance("~ -1 + cell_type")
)

example_sce = anndata.read_h5ad("data/example_sce.h5ad")
sim.fit(example_sce, max_epochs=1, lr=0.01)

ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name  | Type          | Params | Mode 
------------------------------------------------
0 | coefs | ParameterDict | 700    | train
------------------------------------------------
700       Trainable params
0         Non-trainable params
700       Total params
0.003     Total estimated model params size (MB)
1         Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 3/3 [00:01<00:00,  1.89it/s, v_num=801, train_loss_step=1.78e+4, train_loss_epoch=4.58e+5]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 3/3 [00:01<00:00,  1.88it/s, v_num=801, train_loss_step=1.78e+4, train_loss_epoch=4.58e+5]


Estimating copula covariance: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 3/3 [00:01<00:00,  1.79it/s]


In [2]:
preds = sim.predict(example_sce.obs[:10])

{k: v[:4, :4] for k, v in preds.items()}

{'mean': array([[0.98920333, 1.0168874 , 1.0156788 , 1.0062875 ],
        [0.995766  , 1.0218278 , 1.0134252 , 1.0026919 ],
        [0.9919619 , 1.0184183 , 1.0148683 , 1.0099066 ],
        [0.99814737, 1.0146829 , 1.0142584 , 1.0177789 ]], dtype=float32),
 'dispersion': array([[0.9971064 , 1.0191165 , 0.98509187, 1.0064391 ],
        [0.9971064 , 1.0191165 , 0.98509187, 1.0064391 ],
        [0.9971064 , 1.0191165 , 0.98509187, 1.0064391 ],
        [0.9971064 , 1.0191165 , 0.98509187, 1.0064391 ]], dtype=float32)}

In [3]:
sim.sample(example_sce.obs[:10])

AnnData object with n_obs Ã— n_vars = 10 Ã— 100
    obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'cell_type', 'sizeFactor', 'pseudotime'

In [4]:
sim.complexity()

Computing log-likelihood...: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 3/3 [00:01<00:00,  1.71it/s]


{'aic': np.float64(-49453.58175441096), 'bic': np.float64(62287.37980571762)}

In [5]:
sim.parameters

{'marginal': {'mean':                               Pyy      Iapp      Chgb      Rbp4      Spp1  \
  Intercept               -0.007013  0.014241  0.013111  0.006585 -0.003392   
  bs(pseudotime, df=5)[1] -0.007865 -0.002076  0.005023 -0.008634  0.005928   
  bs(pseudotime, df=5)[2]  0.012706  0.014870 -0.003416 -0.002667  0.018260   
  bs(pseudotime, df=5)[3] -0.012162 -0.003179  0.005196 -0.003846 -0.007195   
  bs(pseudotime, df=5)[4]  0.007677  0.009726 -0.001120  0.010342 -0.011063   
  bs(pseudotime, df=5)[5]  0.005158  0.000335  0.001047  0.011038 -0.014341   
  
                               Chga       Cck      Ins1      Nnat      Ins2  \
  Intercept               -0.014289  0.006999 -0.010891  0.004681  0.019425   
  bs(pseudotime, df=5)[1]  0.009984  0.002464  0.003976 -0.011023 -0.003435   
  bs(pseudotime, df=5)[2]  0.013288 -0.000034 -0.019918 -0.029999  0.011829   
  bs(pseudotime, df=5)[3]  0.008413 -0.006280 -0.008185  0.001192  0.001792   
  bs(pseudotime, df=5)[4]  0.

In [6]:
import scdesigner.minimal.transform as tr

null_sim = tr.nullify(sim, "pseudotime", "Pyy|Iapp", "mean")
null_sim.marginal.predict.coefs["mean"][:3, :4]

tensor([[-0.0070,  0.0142,  0.0131,  0.0066],
        [-0.0000, -0.0000,  0.0050, -0.0086],
        [ 0.0000,  0.0000, -0.0034, -0.0027]], grad_fn=<SliceBackward0>)

In [7]:
amplified_sim = tr.amplify(sim, 2, "pseudotime", "Pyy|Iapp", "mean")
decorr_sim = tr.decorrelate(sim, "Pyy|Iapp", "Pyy|Iapp", "cell_type[Ngn3 low EP]")
corr_sim = tr.correlate(sim, 2, "Pyy|Iapp", "Pyy|Iapp", "cell_type[Ngn3 low EP]")

decorr_sim.parameters["copula"]["cell_type[Ngn3 low EP]"].iloc[:4, :4]

Unnamed: 0,Pyy,Iapp,Chgb,Rbp4
Pyy,0.808354,0.0,0.0,0.0
Iapp,0.0,0.558327,0.0,0.0
Chgb,0.0,0.0,0.464727,0.089289
Rbp4,0.0,0.0,0.089289,0.979553


Here is a more manual but very general way to substitute parameters.

In [8]:
import pandas as pd
import numpy as np

new_param = np.random.normal(size=(6, 100))
sub_sim_mean = tr.replace_param(sim, ["marginal", "mean"], new_param)

new_covariance = 0.9 * np.eye(100) + 0.1 * np.ones((100, 100))
new_covariance = pd.DataFrame(new_covariance, columns=example_sce.var_names, index=example_sce.var_names)
sub_sim_cov = tr.replace_param(sim, ["copula", "cell_type[Ngn3 low EP]"], new_covariance)

Here's an example showing how dispersion can be modeled.

In [9]:
from scdesigner.minimal.simulator import scdesigner
from scdesigner.minimal.negbin import NegBin
from scdesigner.minimal.standard_covariance import StandardCovariance

sim = scdesigner(
    NegBin({"mean": "~ bs(pseudotime,df=5)", "dispersion": "~ pseudotime"}),
    StandardCovariance("~ -1 + cell_type")
)

sim.fit(example_sce, batch_size=int(256), max_epochs=1)

ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name  | Type          | Params | Mode 
------------------------------------------------
0 | coefs | ParameterDict | 800    | train
------------------------------------------------
800       Trainable params
0         Non-trainable params
800       Total params
0.003     Total estimated model params size (MB)
1         Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 9/9 [00:01<00:00,  5.50it/s, v_num=802, train_loss_step=1.76e+4, train_loss_epoch=1.14e+5]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 9/9 [00:01<00:00,  5.49it/s, v_num=802, train_loss_step=1.76e+4, train_loss_epoch=1.14e+5]


Estimating copula covariance: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 9/9 [00:01<00:00,  5.44it/s]


In [10]:
print(sim.parameters)
print(sim.sample())
print(sim.predict())

{'marginal': {'mean':                               Pyy      Iapp      Chgb      Rbp4      Spp1  \
Intercept               -0.014139  0.016520 -0.001425  0.005025  0.003911   
bs(pseudotime, df=5)[1] -0.019702 -0.003457 -0.016591  0.001902  0.020429   
bs(pseudotime, df=5)[2]  0.021472  0.018029  0.014331  0.022126  0.018423   
bs(pseudotime, df=5)[3]  0.015375  0.005685  0.004388 -0.002055 -0.013409   
bs(pseudotime, df=5)[4]  0.015674  0.011949  0.010934  0.002055 -0.028155   
bs(pseudotime, df=5)[5]  0.013721  0.000617  0.009873  0.019075  0.035709   

                             Chga       Cck      Ins1      Nnat      Ins2  \
Intercept               -0.003665  0.006615 -0.003771  0.010827 -0.006858   
bs(pseudotime, df=5)[1] -0.005588  0.006733 -0.003934 -0.014154 -0.015762   
bs(pseudotime, df=5)[2]  0.015891  0.010995 -0.004881  0.001932 -0.002737   
bs(pseudotime, df=5)[3] -0.016555  0.013531  0.005574  0.001173  0.013630   
bs(pseudotime, df=5)[4]  0.016949  0.008478  0.005180

In [11]:
example_sce = anndata.read_h5ad("data/million_cells.h5ad", backed=True, chunk_size=int(2e4))

sim = scdesigner(
    NegBin("~ celltype"),
    StandardCovariance("~ -1 + celltype")
)
#sim.fit(example_sce, batch_size=int(1000), lr=0.01)