In [None]:
import anndata
from scdesigner.simulator import scdesigner
from scdesigner.margins.marginal import Normal

I downloaded the data from the scDesign3 [quickstart](https://songdongyuan1994.github.io/scDesign3/docs/articles/scDesign3.html). There are only 100 genes in this demo.

In [None]:
import os
import requests

save_path = "data/example_sce.h5ad"
if not os.path.exists(save_path):
    response = requests.get("https://go.wisc.edu/69435h")
    with open(save_path, "wb") as f:
        f.write(response.content)

example_sce = anndata.read_h5ad(save_path)
example_sce

In [None]:
if not os.path.exists(save_path):
    response = requests.get("https://go.wisc.edu/69435h")
    with open(save_path, "wb") as f:
        f.write(response.content)

example_sce = anndata.read_h5ad(save_path)
example_sce

The result seems quite sensitive to the learning rate. How can we pick a good default? Maybe we can systematically survey good lr's across a range of public data. Alternatively, is there a good way to adapt during learning?

In [None]:
import numpy as np

example_sce.X = np.log(1 + example_sce.X.toarray().astype(np.float32))
sim = scdesigner(example_sce, Normal("~ bs(pseudotime, df=5)"))
sim

In [None]:
from scdesigner.transform import amplify

sim_amplify = amplify(sim, 2, "pseudotime", ["Pyy"])

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

def plot_gene(sim, example_sce, ix=0):
    # get predictions
    gene = example_sce.var_names[ix]
    y_hat = sim.predict(example_sce.obs)["mu"]
    sd = sim.predict(example_sce.obs)["sigma"] 

    # build confidence bands
    y_hat["pseudotime"] = example_sce.obs["pseudotime"].values
    y_hat["lower_sd"] = y_hat[gene].values - sd[gene].values
    y_hat["upper_sd"] = y_hat[gene].values + sd[gene].values

    # plot
    y_hat = y_hat.sort_values(by="pseudotime")
    sns.scatterplot(x="pseudotime", y=gene, data={"pseudotime": example_sce.obs["pseudotime"], gene: example_sce.X[:, ix]})
    plt.fill_between(y_hat['pseudotime'], y_hat['lower_sd'], y_hat['upper_sd'], color='orange', alpha=0.3)
    sns.scatterplot(x="pseudotime", y=gene, data=y_hat)

for i in range(4):
    plot_gene(sim, example_sce, i)
    plot_gene(sim_amplify, example_sce, i)
    plt.show()

We can also modify in place, without creating a copy.

In [None]:
sim.dampen(2, "pseudotime", ["Pyy"])

for i in range(4):
    plot_gene(sim, example_sce, i)
    plt.show()

Let's return to the original parameter estimates.

In [None]:
sim.dampen(2, "pseudotime", ["Pyy"])

In [None]:
from scdesigner.plot import embedding

samples = sim.sample()
emb1 = embedding(samples, example_sce)
emb2 = embedding(samples)

In [None]:
from copy import deepcopy
sim2 = deepcopy(sim)
sim.reformulate(["Pyy", "Iapp"], "~ 1")

In [None]:
for i in range(4):
    plot_gene(sim, example_sce, i)
    plt.show()

In [None]:
fmla = {
    "mu": "~ bs(pseudotime, df=5)",
    "sigma": "~ bs(pseudotime, df=5)"
}

sim = scdesigner(example_sce, Normal(fmla), max_epochs=5)

In [None]:
for i in range(4):
    plot_gene(sim, example_sce, i)
    plt.show()

In [None]:
sim.nullify("pseudotime", ["Pyy", "Iapp"])

In [None]:
for i in range(4):
    plot_gene(sim, example_sce, i)
    plt.show()

In [None]:
sampled = sim.sample()
sampled

In [None]:
sampled = sim.sample(N = 50)
sampled

In [None]:
sampled = sim.sample(obs = example_sce.obs[:10])
sampled