In [1]:
import os
import torch
import json

os.environ["MKL_NUM_THREADS"]="1"
os.environ["NUMEXPR_NUM_THREADS"]="1"
os.environ["OMP_NUM_THREADS"]="1"
os.environ["CUDA_VISIBLE_DEVICES"] = "5,6,7"

import sys

import pytorch_lightning as pl
from omegaconf import DictConfig, OmegaConf
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import random_split
from torch_geometric.data import DataLoader

from spatial.merfish_dataset import FilteredMerfishDataset, MerfishDataset, SyntheticNonlinear, SyntheticDataset0, SyntheticDataset1, SyntheticDataset2, SyntheticDataset3
from spatial.models.monet_ae import MonetAutoencoder2D, TrivialAutoencoder, MonetDense
from spatial.train import train
from spatial.predict import test

import warnings
warnings.filterwarnings("ignore")

In [None]:
os.listdir('../output/lightning_logs/checkpoints/MonetDense/.')

In [3]:
test = MerfishDataset("../data", radius=5)


KeyboardInterrupt



In [34]:
new_test = [sample for sample in test if sample.anid != 30]

In [None]:
new_test

# Testing Any Model

In [2]:
!pip install openpyxl


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [None]:
# read in merfish dataset and get columns names
import pandas as pd

# get relevant data stuff
df_file = pd.ExcelFile("~/spatial/data/messi.xlsx")
messi_df = pd.read_excel(df_file, "All.Pairs")
merfish_df = pd.read_csv("~/spatial/data/raw/merfish.csv")
merfish_df = merfish_df.drop(['Blank_1', 'Blank_2', 'Blank_3', 'Blank_4', 'Blank_5', 'Fos'], axis=1)

# these are the 13 ligands or receptors found in MESSI
non_response_genes = ['Cbln1', 'Cxcl14', 'Crhbp', 'Gabra1', 'Cbln2', 'Gpr165', 
                      'Glra3', 'Gabrg1', 'Adora2a', 'Vgf', 'Scg2', 'Cartpt',
                      'Tac2']
# this list stores the control genes aka "Blank_{int}"
blank_genes = []

# we will populate all of the non-response genes as being in one or the other
# the ones already filled in come from the existing 13 L/R genes above
ligands = ["Cbln1", "Cxcl14", "Cbln2", "Vgf", "Scg2", "Cartpt", "Tac2"]
receptors = ["Crhbp", "Gabra1", "Gpr165", "Glra3", "Gabrg1", "Adora2a"]

# ligands and receptor indexes in MERFISH
non_response_indeces = [list(merfish_df.columns).index(gene)-9 for gene in non_response_genes]
ligand_indeces = [list(merfish_df.columns).index(gene)-9 for gene in ligands]
receptor_indeces = [list(merfish_df.columns).index(gene)-9 for gene in receptors]
all_pairs_columns = [
    "Ligand.ApprovedSymbol",
    "Receptor.ApprovedSymbol",
]


# for column name in the column names above
for column in all_pairs_columns:
    for gene in merfish_df.columns:
        if (
            gene.upper() in list(messi_df[column])
            and gene.upper() not in non_response_genes
        ):
            non_response_genes.append(gene)
            non_response_indeces.append(list(merfish_df.columns).index(gene)-9)
            if column[0] == "L":
                ligands.append(gene)
                ligand_indeces.append(list(merfish_df.columns).index(gene)-9)
            else:
                receptors.append(gene)
                receptor_indeces.append(list(merfish_df.columns).index(gene)-9)
        if gene[:5] == "Blank" and gene not in blank_genes:
            blank_genes.append(gene)
            # non_response_indeces.append(list(merfish_df.columns).index(gene)-9)

print(non_response_genes)
print(
    "There are "
    + str(len(non_response_genes))
    + " genes recognized as either ligands or receptors (including new ones)."
)

print(
    "There are "
    + str(len(blank_genes))
    + " blank genes."
)

print(
    "There are "
    + str(155 - len(blank_genes) - len(non_response_genes))
    + " genes that are treated as response variables."
)

print(
    "There are "
    + str(len(ligands))
    + " ligands."
)

print(
    "There are "
    + str(len(receptors))
    + " receptors."
)

response_indeces = list(set(range(155)) - set(non_response_indeces))

  warn(msg)


['Cbln1', 'Cxcl14', 'Crhbp', 'Gabra1', 'Cbln2', 'Gpr165', 'Glra3', 'Gabrg1', 'Adora2a', 'Vgf', 'Scg2', 'Cartpt', 'Tac2', 'Bdnf', 'Bmp7', 'Cyr61', 'Fn1', 'Fst', 'Gad1', 'Ntng1', 'Pnoc', 'Selplg', 'Sema3c', 'Sema4d', 'Serpine1', 'Adcyap1', 'Cck', 'Crh', 'Gal', 'Gnrh1', 'Nts', 'Oxt', 'Penk', 'Sst', 'Tac1', 'Trh', 'Ucn3', 'Avpr1a', 'Avpr2', 'Brs3', 'Calcr', 'Cckar', 'Cckbr', 'Crhr1', 'Crhr2', 'Galr1', 'Galr2', 'Grpr', 'Htr2c', 'Igf1r', 'Igf2r', 'Kiss1r', 'Lepr', 'Lpar1', 'Mc4r', 'Npy1r', 'Npy2r', 'Ntsr1', 'Oprd1', 'Oprk1', 'Oprl1', 'Oxtr', 'Pdgfra', 'Prlr', 'Ramp3', 'Rxfp1', 'Slc17a7', 'Slc18a2', 'Tacr1', 'Tacr3', 'Trhr']
There are 71 genes recognized as either ligands or receptors (including new ones).
There are 0 blank genes.
There are 84 genes that are treated as response variables.
There are 31 ligands.
There are 40 receptors.


In [None]:
import hydra
from hydra.experimental import compose, initialize

test_loss_rad_dict = {}

for rad in [0,25]:
    with initialize(config_path="../config"):
        try:
            cfg_from_terminal = compose(config_name="config")
            OmegaConf.update(cfg_from_terminal, "model.kwargs.observables_dimension", 71)
            OmegaConf.update(cfg_from_terminal, "model.kwargs.hidden_dimensions", [512, 512, 512, 512, 512, 512])
            OmegaConf.update(cfg_from_terminal, "model.kwargs.output_dimension", 84)
            OmegaConf.update(cfg_from_terminal, "optimizer.name", "Adam")
            OmegaConf.update(cfg_from_terminal, "training.logger_name", "table2_FULL_no_celltypes")
            OmegaConf.update(cfg_from_terminal, "datasets.dataset.include_celltypes", False)
            OmegaConf.update(cfg_from_terminal, "model.kwargs.include_skip_connections", True)
            OmegaConf.update(cfg_from_terminal, "radius", rad)
            OmegaConf.update(cfg_from_terminal, "gpus", [2])
            print(cfg_from_terminal.training.filepath)
            output = test(cfg_from_terminal)
            trainer, l1_losses, inputs, gene_expressions, celltypes, test_results = output
            test_loss_rad_dict[rad] = test_results[0]['test_loss']
        except:
            try:
                OmegaConf.update(cfg_from_terminal, "model.kwargs.hidden_dimensions", [256, 256, 256, 256, 256, 256])
                print(cfg_from_terminal.training.filepath)
                output = test(cfg_from_terminal)
                trainer, l1_losses, inputs, gene_expressions, celltypes, test_results = output
                test_loss_rad_dict[rad] = test_results[0]['test_loss']
            except:
                print(f"Model with radius of {rad} micrometers doesn't exist :(")

In [7]:
test_loss_rad_dict #train only

{0: 0.33008134365081787,
 5: 0.330562025308609,
 10: 0.3282979428768158,
 15: 0.3260890543460846,
 20: 0.324729859828949,
 25: 0.32196560502052307}

In [8]:
test_loss_rad_dict # 512 (animal 30 for this and runs below)

{0: 0.34988829493522644,
 5: 0.3494284152984619,
 10: 0.3461569845676422,
 15: 0.33938685059547424,
 20: 0.3366208076477051,
 25: 0.33311593532562256,
 30: 0.3348855972290039,
 35: 0.3332056701183319,
 40: 0.3343038260936737}

In [None]:
test_loss_rad_dict # 256

In [62]:
test_loss_rad_dict # 128

{0: 0.332673043012619,
 5: 0.33101674914360046,
 10: 0.3289061188697815,
 15: 0.3274388313293457,
 20: 0.3297003209590912,
 25: 0.3224516212940216,
 30: 0.3204915523529053,
 35: 0.32182925939559937,
 40: 0.32207295298576355,
 45: 0.3215629756450653}

In [9]:
with open("deepST_results_no_celltypes.json", "w") as deepST:
    json.dump(test_loss_rad_dict, deepST)

In [None]:
import hydra
from hydra.experimental import compose, initialize

test_loss_rad_dict = {}

for rad in range(0,50,5):
    for synthetic_exp in range(4):
        with initialize(config_path="../config"):
            cfg_from_terminal = compose(config_name="config")
            OmegaConf.update(cfg_from_terminal, "model.kwargs.hidden_dimensions", [128, 128])
            OmegaConf.update(cfg_from_terminal, "training.logger_name", f"synthetic{synthetic_exp}")
            OmegaConf.update(cfg_from_terminal, "radius", rad)
            OmegaConf.update(cfg_from_terminal, "model.kwargs.response_genes", [0])
            OmegaConf.update(cfg_from_terminal, "datasets.dataset", [MerfishDataset])
            output = test(cfg_from_terminal)
            trainer, l1_losses, inputs, gene_expressions, celltypes, test_results = output
            test_loss_rad_dict[rad] = test_results[0]['test_loss']

In [None]:
test_loss_rad_dict

In [7]:
import hydra
from hydra.experimental import compose, initialize

test_loss_rad_dict_response = {}

for rad in range(30,31,1):
    with initialize(config_path="../config"):
        cfg_from_terminal = compose(config_name="config")
#         OmegaConf.update(cfg_from_terminal, "model.kwargs.observables_dimension", 71)
        OmegaConf.update(cfg_from_terminal, "model.kwargs.hidden_dimensions", [128, 128, 128, 128, 128, 128])
#         OmegaConf.update(cfg_from_terminal, "model.kwargs.output_dimension", 84)
        OmegaConf.update(cfg_from_terminal, "training.logger_name", "table2")
        OmegaConf.update(cfg_from_terminal, "radius", rad)
        OmegaConf.update(cfg_from_terminal, "gpus", [1])
        OmegaConf.update(cfg_from_terminal, "model.kwargs.response_genes", [0])
        OmegaConf.update(cfg_from_terminal, "datasets.dataset", [MerfishDataset])
        output = test(cfg_from_terminal)
        trainer, l1_losses, inputs, gene_expressions, celltypes, test_results = output
        test_loss_rad_dict_response[rad] = test_results[0]['test_loss']

 33%|███████████████████████████████████                                                                      | 8/24 [00:06<00:12,  1.26it/s]

KeyboardInterrupt



In [None]:
torch.mean(torch.abs(inputs[:, 93] - gene_expressions[:, 93]))

In [10]:
test_loss_rad_dict_response

{0: 0.35909464955329895,
 5: 0.35904601216316223,
 10: 0.3585062026977539,
 15: 0.35465207695961,
 20: 0.35176217555999756,
 25: 0.3485024571418762,
 30: 0.3471652865409851,
 35: 0.34563148021698,
 40: 0.34320735931396484,
 45: 0.3435050845146179,
 50: 0.3449954688549042,
 55: 0.34212666749954224,
 60: 0.3430047631263733}

# Save Top K Testing

In [15]:
import hydra
from hydra.experimental import compose, initialize

test_loss_rad_dict_response = {}

for rad in range(0,65,5):
    for v_number in [""] + ["-v" + str(i) for i in range(1,10)]:
        with initialize(config_path="../config"):
            cfg_from_terminal = compose(config_name="config")
            OmegaConf.update(cfg_from_terminal, "model.kwargs.observables_dimension", 71)
            OmegaConf.update(cfg_from_terminal, "model.kwargs.hidden_dimensions", [128, 128, 128, 128, 128, 128])
            OmegaConf.update(cfg_from_terminal, "model.kwargs.output_dimension", 84)
            OmegaConf.update(cfg_from_terminal, "model.kwargs.dim", 2)
            OmegaConf.update(cfg_from_terminal, "training.logger_name", "new_psuedo_table2")
            OmegaConf.update(cfg_from_terminal, "optimizer.name", "Adam")
            OmegaConf.update(cfg_from_terminal, "radius", rad)
            OmegaConf.update(cfg_from_terminal, "gpus", [1])
            OmegaConf.update(cfg_from_terminal, "training.filepath", cfg_from_terminal.training.filepath + v_number)
#             OmegaConf.update(cfg_from_terminal, "model.kwargs.response_genes", [0])
            output = test(cfg_from_terminal)
            trainer, l1_losses, inputs, gene_expressions, celltypes, test_results = output
            if (rad not in test_loss_rad_dict_response) or (test_results[0]['test_loss'] < test_loss_rad_dict_response[rad]):
                test_loss_rad_dict_response[rad] = test_results[0]['test_loss']

 29%|███████████████████████████████▌                                                                            | 7/24 [00:05<00:13,  1.22it/s]

KeyboardInterrupt



General deepST for Individual Gene Predictions

In [None]:
import hydra
from hydra.experimental import compose, initialize

test_loss_rad_dict_response = {}
test_loss_rad_dict_93 = {}
test_loss_rad_dict_151 = {}

for rad in range(0,80,10):
    with initialize(config_path="../config"):
        cfg_from_terminal = compose(config_name="config")
        OmegaConf.update(cfg_from_terminal, "model.kwargs.hidden_dimensions", [128, 128, 128, 128, 128, 128])
        OmegaConf.update(cfg_from_terminal, "radius", rad)
        trainer, l1_losses, inputs, gene_expressions, celltypes, test_results = output
        test_loss_rad_dict_response[rad] = test_results[0]['test_loss']

In [None]:
test_loss_rad_dict_93, test_loss_rad_dict_151

In [None]:
# equivalent to spatial

import hydra
from hydra.experimental import compose, initialize

with initialize(config_path="../config"):
    cfg_from_terminal = compose(config_name="config")
    OmegaConf.update(cfg_from_terminal, "model.kwargs.hidden_dimensions", [256, 256])
    OmegaConf.update(cfg_from_terminal, "training.logger_name", "neighbors_large")
    OmegaConf.update(cfg_from_terminal, "radius", 0)
    output = test(cfg_from_terminal)
    trainer, l1_losses, inputs, gene_expressions, celltypes, test_results = output
    excitatory_cells = (celltypes == 6).nonzero(as_tuple=True)[0]
    MAE_excitatory = torch.abs(torch.index_select((gene_expressions-inputs)[excitatory_cells], 1, torch.tensor(response_indeces))).mean().item()

In [None]:
MAE_excitatory

In [None]:
trainer, l1_losses, inputs, gene_expressions, celltypes, test_results = output

In [None]:
test_results[0]['test_loss: mae_response']

In [None]:
# equivalent to spatial

import hydra
from hydra.experimental import compose, initialize

with initialize(config_path="../config"):
    cfg_from_terminal = compose(config_name="config")
    output = test(cfg_from_terminal)

# Testing Models with Updates

In [None]:
# equivalent to spatial

import hydra
from hydra.experimental import compose, initialize

with initialize(config_path="../config"):
    cfg_from_terminal = compose(config_name="config")
    # update the behavior to get the model of interest
    OmegaConf.update(cfg_from_terminal, "datasets.dataset.behaviors", ["Parenting"])
    output = test(cfg_from_terminal)

In [None]:
trainer, l1_losses, inputs, gene_expressions, celltypes, test_results = output

In [None]:
with open('../spatial/non_response.txt', "r") as genes_file:
    features = [int(x) for x in genes_file.read().split(",")]
    response_indeces = torch.tensor(list(set(range(160)) - set(features)))
    genes_file.close()

In [None]:
excitatory_cells = (celltypes == 6).nonzero(as_tuple=True)[0]

In [None]:
import torch

loss = torch.nn.L1Loss()
loss(torch.index_select(inputs[excitatory_cells], 1, response_indeces), torch.index_select(gene_expressions[excitatory_cells], 1, response_indeces))

In [None]:
# equivalent to spatial

import hydra
from hydra.experimental import compose, initialize

with initialize(config_path="../config"):
    cfg_from_terminal = compose(config_name="config")
    # update the behavior to get the model of interest
    OmegaConf.update(cfg_from_terminal, "datasets.dataset.behaviors", ["Virgin Parenting"])
    output = test(cfg_from_terminal)

In [None]:
trainer, l1_losses, inputs, gene_expressions, celltypes, test_results = output

In [None]:
excitatory_cells = (celltypes == 6).nonzero(as_tuple=True)[0]

In [None]:
import torch

loss = torch.nn.L1Loss()
loss(torch.index_select(inputs[excitatory_cells], 1, response_indeces), torch.index_select(gene_expressions[excitatory_cells], 1, response_indeces))

In [None]:
# equivalent to spatial

import hydra
from hydra.experimental import compose, initialize

with initialize(config_path="../config"):
    cfg_from_terminal = compose(config_name="config")
    # update the behavior to get the model of interest
    OmegaConf.update(cfg_from_terminal, "datasets.dataset.behaviors", ["Naive"])
    output = test(cfg_from_terminal)

In [None]:
trainer, l1_losses, inputs, gene_expressions, celltypes, test_results = output

In [None]:
excitatory_cells = (celltypes == 6).nonzero(as_tuple=True)[0]

In [None]:
import torch

loss = torch.nn.L1Loss()
loss(torch.index_select(inputs[excitatory_cells], 1, response_indeces), torch.index_select(gene_expressions[excitatory_cells], 1, response_indeces))