In [114]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [115]:
import scanpy as sc
import numpy as np
import phate 
from scipy.spatial.distance import pdist, squareform
from data import train_valid_loader_from_pc
from model import AEDist
import torch
from transformations import LogTransform, NonTransform, StandardScaler, \
    MinMaxScaler, PowerTransformer, KernelTransform

from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning.loggers import WandbLogger, TensorBoardLogger

In [116]:
adata = sc.read_h5ad("/home/icb/alessandro.palma/environment/scCFM/project_dir/data/pancreas/processed/pancreas.h5ad")

In [117]:
adata.X.shape

(20519, 2000)

In [108]:
X_expression = np.array(adata.X.copy().todense())

### Run PHATE

In [109]:
phate_op = phate.PHATE()
phate_coords = phate_op.fit_transform(X_expression)

Calculating PHATE...
  Running PHATE on 20519 observations and 2000 variables.
  Calculating graph and diffusion operator...
    Calculating PCA...
    Calculated PCA in 3.32 seconds.
    Calculating KNN search...
    Calculated KNN search in 36.23 seconds.
    Calculating affinities...
    Calculated affinities in 4.13 seconds.
  Calculated graph and diffusion operator in 43.73 seconds.
  Calculating landmark operator...
    Calculating SVD...
    Calculated SVD in 2.42 seconds.
    Calculating KMeans...
    Calculated KMeans in 3.19 seconds.
  Calculated landmark operator in 6.73 seconds.
  Calculating optimal t...
    Automatically selected t = 17
  Calculated optimal t in 1.60 seconds.
  Calculating diffusion potential...
  Calculated diffusion potential in 0.47 seconds.
  Calculating metric MDS...
  Calculated metric MDS in 6.10 seconds.
Calculated PHATE in 58.65 seconds.


In [110]:
phate_D = squareform(pdist(phate_coords))

In [111]:
dist_std = np.std(phate_D.flatten())

In [96]:
trainloader, valloader, mean, std = train_valid_loader_from_pc(
            X_expression, # <---- Pointcloud
            phate_D, # <---- Distance matrix to match
            batch_size=256,
            train_valid_split=0.8,
            shuffle=True,
            seed=42, return_mean_std=True, componentwise_std=False)

In [112]:
b = next(iter(trainloader))

### Initialize AE

In [118]:
model = AEDist(
            dim=X_expression.shape[1],
            emb_dim=10,
            layer_widths=[256, 128, 64],
            activation_fn=torch.nn.ReLU(),
            dist_reconstr_weights=[0.9, 0.1, 0.],
            pp=NonTransform(),
            lr=0.001,
            weight_decay=0.0001,
            batch_norm=True,
            dist_recon_topk_coords=0,
            use_dist_mse_decay=False,
            dist_mse_decay=0.,
            dropout=0.,
            cycle_weight=0.,
            cycle_dist_weight=0.,
            mean=mean,
            std=std,
            dist_std=dist_std)

  rank_zero_warn(


In [119]:
model

AEDist(
  (encoder): MLP(
    (net): Sequential(
      (0): Linear(in_features=2000, out_features=256, bias=True)
      (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Linear(in_features=256, out_features=128, bias=True)
      (4): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU()
      (6): Linear(in_features=128, out_features=64, bias=True)
      (7): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (8): ReLU()
      (9): Linear(in_features=64, out_features=10, bias=True)
    )
  )
  (decoder): MLP(
    (net): Sequential(
      (0): Linear(in_features=10, out_features=64, bias=True)
      (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Linear(in_features=64, out_features=128, bias=True)
      (4): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_sta

## Run with torch lightning 

In [None]:
path_dir = "/home/icb/alessandro.palma/environment/scCFM/project_dir/baselines/gaga/run"
path_model = "/home/icb/alessandro.palma/environment/scCFM/project_dir/baselines/gaga/model"

logger = TensorBoardLogger(save_dir=path_dir)
checkpoint_callback = ModelCheckpoint(
    dirpath=path_dir,  # Save checkpoints in wandb directory
    filename=path_model,
    save_top_k=1,
    monitor='train_loss_step',  # Model selection based on validation loss
    mode='min',  # Minimize validation loss,
    every_n_train_steps=10000
)

trainer = Trainer(
    logger=logger,
    max_epochs=50,
    accelerator='cuda',
    callbacks=[checkpoint_callback],
    log_every_n_steps=100,
)

trainer.fit(
    model=model,
    train_dataloaders=trainloader)

  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: /home/icb/alessandro.palma/environment/scCFM/project_dir/baselines/gaga/run/lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type | Params
---------------------------------
0 | encoder | MLP  | 136 K 
1 | decoder | MLP  | 138 K 
---------------------------------
275 K     Trainable params
0         Non-trainable params
275 K     Total params
1.103     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]