In [1]:
from IMPA.dataset.data_loader import CellDataLoader
from IMPA.solver import IMPAmodule
from IMPA.model import build_model
from omegaconf import OmegaConf
from tqdm import tqdm
import matplotlib.pyplot as plt

from pathlib import Path
import numpy as np
import os
import torch

An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.


In [2]:
class Args:
    def __init__(self, dictionary):
        self.__dict__ = dictionary

    def __getattr__(self, key):
        if key in self.__dict__:
            return self.__dict__[key]
        else:
            raise AttributeError(f"'DictToObject' object has no attribute '{key}'")

    def __call__(self, key):
        return self.__getattr__(key)

def t2np(t):
    return ((t.permute(0,2, 3, 1) + 1) / 2).clamp(0,1).cpu().numpy()

In [3]:
config_dict = {
    "task_name": "cpg0000_total_large_transported",
    "img_size": 96,
    "latent_dim": 100,
    "hidden_dim": 512,
    "style_dim": 32,
    "stochastic": True,
    "z_dimension": 20,
    "dim_in": 64,
    "lambda_reg": 1,
    "lambda_cyc": 1,
    "lambda_sty": 1,
    "lambda_ds": 1,
    "total_epochs": 100,
    "ds_iter": 200000,
    "resume_iter": 0,
    "batch_size": 32,
    "val_batch_size": 20,
    "lr": 0.0001,
    "f_lr": 0.0001,
    "beta1": 0,
    "beta2": 0.99,
    "weight_decay": 0.0001,
    "num_outs_per_domain": 10,
    "single_style": True,
    "ood_set": None,
    "mol_list": None,
    "balanced": False,
    "trainable_emb": False,
    "dataset_name": "cpg0000",
    "n_channels": 5,
    "num_workers": 6,
    "seed": 42,
    "image_path": "/lustre/groups/ml01/datasets/projects/cpg0000_alessandro/cpg0000_u2os_normalized_segmented_large_transported",
    "data_index_path": "/lustre/groups/ml01/datasets/projects/cpg0000_alessandro/metadata/metadata_large_gene2vec.csv",
    "embedding_path": None,
    "experiment_directory": "../project_folder/experiments/metadata_large_gene2vec.csv",
    "sample_dir": "sample",
    "checkpoint_dir": "checkpoint",
    "basal_vs_real_folder": "basal_vs_real",
    "naming_key": "dataset_name",
    "embedding_folder": "embeddings",
    "resume_dir": '',
    "augment_train": True,
    "normalize": True,
    "print_every": 10,
    "sample_every": 1000,
    "save_every": 500,
    "eval_every": 500,
    "encode_rdkit": True,
    "num_layers_mapping_net": 1,
    "filename": "epoch_{epoch:04d}",
    "monitor": "fid_transformations",
    "mode": "min",
    "save_last": True,
    "offline": False,
    "project": "cpg0000_total_large_transported",
    "log_model": True,
    "accelerator": "gpu",
    "log_every_n_steps": 10, 

    "add_controls": False,
    "embedding_path": {"Compound": "/home/icb/alessandro.palma/environment/IMPA/IMPA/embeddings/cpg0000/cpd_embeddings.csv",
                       "CRISPR": "/home/icb/alessandro.palma/environment/IMPA/IMPA/embeddings/cpg0000/crispr_embeddings.csv", 
                       "ORF": "/home/icb/alessandro.palma/environment/IMPA/IMPA/embeddings/cpg0000/orf_embeddings.csv"}, 
    "multimodal": True,
    "modality_list": ["CRISPR", "Compound", "ORF"],
    "condition_embedding_dimension": 10
}

args = OmegaConf.create(config_dict)

dest_dir = "/home/icb/alessandro.palma/environment/IMPA/IMPA/project_folder/experiments/20240115_35929b66-de4e-4acf-a58f-d95b26034998_cpg0000_total_large_transported"

In [4]:
dataloader = CellDataLoader(args)

{'CRISPR': 328, 'Compound': 1224, 'ORF': 456}


In [5]:
import pandas as pd
mat = pd.read_csv("/home/icb/alessandro.palma/environment/IMPA/IMPA/embeddings/cpg0000/crispr_embeddings.csv", index_col=0)

In [6]:
dl = dataloader.train_dataloader()

In [7]:
# next(iter(dl))

In [8]:
solver = IMPAmodule(args, "", dataloader)

Number of parameters in generator: 24102789
Number of parameters in style_encoder: 14314208
Number of parameters in discriminator: 14681003
Number of parameters in mapping_network: 67232
Initializing embedding_matrix...
Initializing condition_embedding_matrix...
Initializing generator...
Initializing style_encoder...
Initializing discriminator...
Initializing mapping_network...
IMPAmodule(
  (embedding_matrix): ModuleList(
    (0): Embedding(296, 328)
    (1): Embedding(296, 1224)
    (2): Embedding(155, 456)
  )
  (condition_embedding_matrix): Embedding(3, 10)
  (generator): DataParallel(
    (module): Generator(
      (from_rgb): Conv2d(5, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (encode): ModuleList(
        (0): ResBlk(
          (actv): LeakyReLU(negative_slope=0.2)
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (norm1): Instance

In [9]:
from pytorch_lightning import Trainer
trainer = Trainer(max_epochs=1,
                        accelerator="gpu",)
            
# Fit the model 
trainer.fit(model=solver, 
            train_dataloaders=dataloader.train_dataloader(),
            val_dataloaders=dataloader.val_dataloader())

/home/icb/alessandro.palma/miniconda3/envs/IMPA_try/lib/python3.9/site-packages/lightning_fabric/plugins/environments/slurm.py:191: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python3.9 /home/icb/alessandro.palma/miniconda3/envs/IMPA_t ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/icb/alessandro.palma/miniconda3/envs/IMPA_try/lib/python3.9/site-packages/pytorch_lightning/trainer/configuration_validator.py:72: You passed in a `val_dataloader` but have no `validation_step`. Skipping val loop.
You are using a CUDA device ('NVIDIA A100-PCIE-40GB MIG 3g.20gb') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read htt

Training: |          | 0/? [00:00<?, ?it/s]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       device='cuda:0')
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       device='cuda:0')
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1.])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       device='cuda:0')
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1.])
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       device='cuda:0')
tensor([2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
        2., 2.])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       device='cuda:0')
tensor([2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.

FileNotFoundError: [Errno 2] No such file or directory: 'sample/000001_latent.jpg'

In [None]:
embedding_matrix = solver.embedding_matrix

In [None]:
latent_dim = {mod: emb.shape[1] for mod, emb in embedding_matrix.items()}

In [None]:
d = solver.discriminator.cuda()

In [None]:
inp = torch.randn(16,5,96,96).cuda()
mol = 1*torch.zeros(16).cuda()

In [8]:
d(inp, mol, 2)

NameError: name 'd' is not defined

In [10]:
a =  torch.ones(10)

In [12]:
(a == 10)

False

In [13]:
solver

IMPAmodule(
  (embedding_matrix): ModuleList(
    (0): Embedding(296, 328)
    (1): Embedding(296, 1224)
    (2): Embedding(155, 456)
  )
  (condition_embedding_matrix): Embedding(3, 10)
  (generator): DataParallel(
    (module): Generator(
      (from_rgb): Conv2d(5, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (encode): ModuleList(
        (0): ResBlk(
          (actv): LeakyReLU(negative_slope=0.2)
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (norm1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
          (norm2): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
          (conv1x1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        )
        (1): ResBlk(
          (actv): LeakyReLU(negative_slope=0.2)
          (conv1): Conv2d(128, 128, ker

In [15]:
dataloader.n_mol

{'CRISPR': 296, 'Compound': 296, 'ORF': 155}

In [18]:
dataloader.mol_names

AttributeError: 'CellDataLoader' object has no attribute 'mol_names'