In [1]:
import os
import scanpy as sc
import numpy as np
import pandas as pd
import torch
import yaml
from lightning.pytorch.callbacks import ModelCheckpoint, LearningRateMonitor, TQDMProgressBar
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.utilities.model_summary import ModelSummary

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

from celldreamer.paths import ROOT
from celldreamer.estimator.celldreamer_estimator import CellDreamerEstimator
from celldreamer.paths import DATA_DIR
from celldreamer.data.utils import Args

In [4]:
cd $ROOT

/home/icb/till.richter/git/celldreamer


Load configuration 

In [5]:
config = yaml.safe_load(open(ROOT / "configs/hlca/config_ddpm.yaml", 
                            "rb"))
args_hlca = Args(config["args"]) 

Initialize estimator 

In [6]:
estimator = CellDreamerEstimator(args_hlca)

Create the training folders...
Initialize data module...




GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Initialize feature embeddings...
Initialize model...


In [7]:
estimator.generative_model

ConditionalGaussianDDPM(
  (denoising_model): MLPTimeStep(
    (net): Sequential(
      (0): MLPTimeEmbedCond(
        (time_embed_net): Sequential(
          (0): Linear(in_features=100, out_features=32, bias=True)
          (1): SELU()
          (2): Linear(in_features=32, out_features=32, bias=True)
        )
        (net): Sequential(
          (0): Linear(in_features=2000, out_features=32, bias=True)
          (1): SELU()
        )
        (out_layer): Linear(in_features=32, out_features=32, bias=True)
      )
      (1): MLPTimeEmbedCond(
        (time_embed_net): Sequential(
          (0): Linear(in_features=100, out_features=32, bias=True)
          (1): SELU()
          (2): Linear(in_features=32, out_features=32, bias=True)
        )
        (net): Sequential(
          (0): Linear(in_features=32, out_features=32, bias=True)
          (1): SELU()
        )
        (out_layer): Linear(in_features=32, out_features=32, bias=True)
      )
      (2): MLPTimeEmbedCond(
        (time

Train model

In [8]:
estimator.train()

[rank: 0] Global seed set to 42
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type        | Params
------------------------------------------------
0 | denoising_model | MLPTimeStep | 8.3 M 
1 | mse             | MSELoss     | 0     
------------------------------------------------
8.3 M     Trainable params
0         Non-trainable params
8.3 M     Total params
33.391    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


In [9]:
estimator.generative_model = estimator.generative_model.to("cuda")
estimator.generative_model.denoising_model = estimator.generative_model.denoising_model.to("cuda")

**Generate**

In [10]:
T = estimator.generative_model.T
T

1000

In [11]:
vec = torch.randn(10, 50).to("cuda")
t1 = 1000*torch.ones(10).to("cuda")
t2 = 1*torch.ones(10).to("cuda")

In [12]:
estimator.generative_model.denoising_model(vec, t1, None)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (10x50 and 2000x32)

In [None]:
estimator.generative_model.denoising_model(vec.to('cuda'), t2.to('cuda'), None)

**Check timestep embedding**

In [None]:
# X_gen = estimator.generative_model.sample(batch_size=1000,
#                                              y=None, 
#                                    `          return_all_timesteps=False,
#                                              clip_denoised=True)

X_gen= estimator.generative_model.ddim_sample(batch_size=1000, 
                      y=None, 
                      return_all_timesteps = False, 
                      ddim_sampling_eta=0)

In [None]:
X_gen

**Plot real train data**

In [None]:
adata_train_real = sc.read_h5ad(os.path.join(ROOT, 'celldreamer', 'data', 'hlca', 'train_adata.h5ad'))
adata_train_real

In [None]:
sc.tl.pca(adata_train_real)
sc.pp.neighbors(adata_train_real)
sc.tl.umap(adata_train_real)

In [None]:
sc.pl.umap(adata_train_real)

**Plot simulated data**

In [None]:
adata_tmp = sc.AnnData(X=X_gen.detach().cpu().numpy())
sc.tl.pca(adata_tmp)
sc.pp.neighbors(adata_tmp)
sc.tl.umap(adata_tmp)

In [None]:
sc.pl.umap(adata_tmp)

**Plot real + simulated data**

In [None]:
d = []

for batch in estimator.datamodule.train_dataloader:
    d.append(batch["X"])
    
d = torch.cat(d, dim=0)
# d = torch.clip(d, -3,3)

In [None]:
adata = sc.AnnData(X = np.concatenate([X_gen.detach().cpu().numpy(), d.cpu().numpy()]),
                   obs = pd.DataFrame({"type":["gen"]*len(X_gen)+["real"]*len(d)}))

In [None]:
sc.tl.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)

In [None]:
sc.pl.pca(adata, color="type")

In [None]:
sc.pl.umap(adata, color="type")

In [None]:
d.mean()

In [None]:
X_gen.mean()

In [None]:
d.max()

In [None]:
d.max()

In [None]:
d.min()

In [None]:
d.mean()

In [None]:
d.min()

In [None]:
# d = 2 * (d - d.min(1).values.unsqueeze(-1)) / (d.max(1).values.unsqueeze(-1) - d.min(1).values.unsqueeze(-1)) - 1

In [None]:
# d = (d - d.min(1).values.unsqueeze(-1)) / (d.max(1).values.unsqueeze(-1) - d.min(1).values.unsqueeze(-1)) 

In [None]:
# d