In [1]:
# Standard imports
import numpy as np

# pertpy is needed to download the Kang data
import pertpy
import scanpy as sc

# This will download the data to ./data/kang_2018.h5ad
adata = pertpy.data.kang_2018()
# Store counts separately in the layers
adata.layers["counts"] = adata.X.copy()

  from .autonotebook import tqdm as notebook_tqdm
OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [2]:
import pylemur.pp.basic

adata.layers["logcounts"] = pylemur.pp.basic.shifted_log_transform(adata.X)

In [3]:
import dask.array as da

def get_input_arr():
    return da.from_array(adata.layers["logcounts"])

In [4]:
x = np.array([[1, 2, 3]])

In [5]:
np.squeeze(x).shape

(3,)

In [6]:
A = np.ones((24673, 2))
B = np.ones((24673,))

In [7]:
B

array([1., 1., 1., ..., 1., 1., 1.])

In [8]:
adata

AnnData object with n_obs × n_vars = 24673 × 15706
    obs: 'nCount_RNA', 'nFeature_RNA', 'tsne1', 'tsne2', 'label', 'cluster', 'cell_type', 'replicate', 'nCount_SCT', 'nFeature_SCT', 'integrated_snn_res.0.4', 'seurat_clusters'
    var: 'name'
    obsm: 'X_pca', 'X_umap'
    layers: 'counts', 'logcounts'

In [9]:
import pylemur

model = pylemur.tl.LEMUR(adata, get_input_arr, design = "~ label", n_embedding=15, layer = "logcounts")
model.fit()
model.align_with_harmony()
print(model)

Centering the data using linear regression.
Find base point
Fit regression on latent spaces
Find shared embedding coordinates


  theta = theta * (1 - np.exp(-((N_b / (nclust * tau)) ** 2)))
2024-12-10 09:33:45,310 - harmonypy - INFO - Computing initial centroids with sklearn.KMeans...
2024-12-10 09:33:46,846 - harmonypy - INFO - sklearn.KMeans initialization complete.
2024-12-10 09:33:46,930 - harmonypy - INFO - Stopped before convergence


Alignment iteration 0
(24673, 15) 1 24673
Alignment iteration 1
(24673, 15) 1 24673
Alignment iteration 2
(24673, 15) 1 24673
Alignment iteration 3
(24673, 15) 1 24673
Alignment iteration 4
(24673, 15) 1 24673
Alignment iteration 5
(24673, 15) 1 24673
Alignment iteration 6
(24673, 15) 1 24673
Alignment iteration 7
(24673, 15) 1 24673
Converged
LEMUR model with 15 dimensions


In [None]:
# Recalculate the UMAP on the embedding calculated by LEMUR
adata.obsm["embedding"] = model.embedding
sc.pp.neighbors(adata, use_rep="embedding")
sc.tl.umap(adata)
sc.pl.umap(adata, color=["label", "cell_type"])

In [None]:
import matplotlib.pyplot as plt

adata.layers["diff"] = np.random.randn(*adata.shape)
sel_gene = "TNFRSF18"

fsize = plt.rcParams["figure.figsize"]
fig = plt.figure(figsize=(fsize[0] * 3, fsize[1]))
axs = [fig.add_subplot(1, 3, i + 1) for i in range(3)]
for ax in axs:
    ax.set_aspect("equal")
sc.pl.umap(
    adata,
    layer="diff",
    color=[sel_gene],
    cmap=plt.get_cmap("seismic"),
    vcenter=0,
    vmin=-0.5,
    vmax=0.5,
    title="Pred diff (stim - ctrl)",
    ax=axs[0],
    show=False,
)

In [None]:
sc.pl.umap(
    adata,
    layer="diff",
    color=[sel_gene, "YBEY"],
    cmap=plt.get_cmap("seismic"),
    vcenter=0,
    vmin=-0.5,
    vmax=0.5,
    title="Pred diff (stim - ctrl)",
    show=False,
)

In [None]:
adata.var_names