In [1]:
%load_ext autoreload
%autoreload 2

import ncem
import numpy as np
import seaborn as sns

from scipy.stats import ttest_rel, ttest_ind

sns.set_palette("colorblind")

In [2]:
# paths
data_path_base ="./data/"
out_path = "./tutorial/"
fn_out_cv = out_path + "/results/"

# Dataset specific inputs

In [3]:
data_set = 'zhang'
data_path = data_path_base + '/zhang/'
log_transform = False
use_domain = True
scale_node_size=False
merge_node_types_predefined = True
covar_selection = []
output_layer='linear'



# Manual inputs

In [4]:
model_class = 'interactions'
optimizer = 'adam'
domain_type = 'patient'

learning_rate = 0.05
l1 = 0.
l2 = 0.

batch_size = 64
radius = 100
n_eval_nodes = 10

gs_id = f"tutorial_{model_class}_{radius}_{data_set}_{domain_type}"

# Model and training

In [5]:
ncv = 3
epochs = 2000 if "tutorial" not in gs_id else 10 
epochs_warmup = 0
max_steps_per_epoch = 20
patience = 100
lr_schedule_min_lr = 1e-10
lr_schedule_factor = 0.5
lr_schedule_patience = 50
val_bs = 16
max_val_steps_per_epoch = 10
shuffle_buffer_size = None

feature_space_id = "standard"
cond_feature_space_id = "type"

use_covar_node_label = False
use_covar_node_position = False
use_covar_graph_covar = False

In [6]:
trainer = ncem.train.TrainModelInteractions()

In [7]:
trainer.init_estim(log_transform=log_transform)

In [8]:
trainer = ncem.train.TrainModelInteractions()
trainer.init_estim(log_transform=log_transform)

trainer.estimator.get_data(
    data_origin=data_set,
    data_path=data_path,
    radius=radius,
    graph_covar_selection=covar_selection,
    node_label_space_id=cond_feature_space_id,
    node_feature_space_id=feature_space_id,
    # feature_transformation=transformation_dict[transform_key],
    use_covar_node_position=use_covar_node_position,
    use_covar_node_label=use_covar_node_label,
    use_covar_graph_covar=use_covar_graph_covar,
    # hold_out_covariate=hold_out_covariate,
    domain_type=domain_type,
    # merge_node_types_predefined=merge_node_types_predefined,
)

Loading data from raw files
registering celldata


  if not is_categorical(df_full[k]):
  res = method(*args, **kwargs)


collecting image-wise celldata


  5%|▍         | 3/64 [00:00<00:02, 24.62it/s]

adding graph-level covariates


100%|██████████| 64/64 [00:03<00:00, 20.58it/s]


Loaded 64 images with complete data from 2 patients over 280327 cells with 254 cell features and 25 distinct celltypes.
Mean of mean node degree per images across images: 40.670611


In [9]:
trainer.estimator.split_data_node(
    validation_split=0.1,
    test_split=0.1,
    seed=0
)

Using split method: node. 
 Train-test-validation split is based on total number of nodes per patients over all images.

Excluded 3771 cells with the following unannotated cell type: [['other']] 

Whole dataset: 280327 cells out of 64 images from 2 patients.
Test dataset: 27657 cells out of 64 images from 2 patients.
Training dataset: 225156 cells out of 64 images from 2 patients.
Validation dataset: 24889 cells out of 64 images from 2 patients. 



In [10]:
trainer.estimator.init_model(
    optimizer=optimizer,
    learning_rate=learning_rate,
    n_eval_nodes_per_graph=n_eval_nodes,

    l2_coef=l2,
    l1_coef=l1,
    use_interactions=True,
    use_domain=use_domain,
    scale_node_size=scale_node_size,
    output_layer=output_layer,
)
trainer.estimator.model.training_model.summary()

Model: "interaction_linear_model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_da_group (InputLayer)     [(None, 2)]          0                                            
__________________________________________________________________________________________________
tf.cast (TFOpLambda)            (None, 2)            0           input_da_group[0][0]             
__________________________________________________________________________________________________
tf.expand_dims (TFOpLambda)     (None, 1, 2)         0           tf.cast[0][0]                    
__________________________________________________________________________________________________
interaction (InputLayer)        [(None, 10, 625)]    0                                            
___________________________________________________________________________

In [11]:
trainer.estimator.train(
    epochs=epochs,
    epochs_warmup=epochs_warmup,
    batch_size=batch_size,
    max_steps_per_epoch=max_steps_per_epoch,
    validation_batch_size=val_bs,
    max_validation_steps=max_val_steps_per_epoch,
    patience=patience,
    lr_schedule_min_lr=lr_schedule_min_lr,
    lr_schedule_factor=lr_schedule_factor,
    lr_schedule_patience=lr_schedule_patience,
    monitor_partition="val",
    monitor_metric="loss",
    shuffle_buffer_size=shuffle_buffer_size,
    early_stopping=True,
    reduce_lr_plateau=True,
)

Epoch 1/10
1/1 - 11s - loss: 919.0958 - custom_mae: 1.2895 - custom_mean_sd: 1.0000 - custom_mse: 5.3991 - custom_mse_scaled: 5.3991 - gaussian_reconstruction_loss: 919.0958 - r_squared: -3.7745e-01 - r_squared_linreg: 6.7922e-04 - val_loss: 732.7539 - val_custom_mae: 1.3727 - val_custom_mean_sd: 1.0367 - val_custom_mse: 4.2453 - val_custom_mse_scaled: 3.8610 - val_gaussian_reconstruction_loss: 732.7539 - val_r_squared: -8.4634e-02 - val_r_squared_linreg: 0.0380
Epoch 2/10
1/1 - 5s - loss: 734.0692 - custom_mae: 1.3753 - custom_mean_sd: 1.0367 - custom_mse: 4.2553 - custom_mse_scaled: 3.8713 - gaussian_reconstruction_loss: 734.0692 - r_squared: -8.7996e-02 - r_squared_linreg: 0.0388 - val_loss: 622.5869 - val_custom_mae: 1.3536 - val_custom_mean_sd: 1.0733 - val_custom_mse: 3.5152 - val_custom_mse_scaled: 2.9271 - val_gaussian_reconstruction_loss: 622.5868 - val_r_squared: 0.1003 - val_r_squared_linreg: 0.1829
Epoch 3/10
1/1 - 5s - loss: 622.2020 - custom_mae: 1.3556 - custom_mean_sd: 

In [15]:
evaluation_test = trainer.estimator.evaluate_any(
    img_keys=trainer.estimator.img_keys_test,
    node_idx=trainer.estimator.nodes_idx_test
)

2734/2734 - 92s - loss: 454.0515 - custom_mae: 1.0095 - custom_mean_sd: 1.3203 - custom_mse: 2.3447 - custom_mse_scaled: 1.2388 - gaussian_reconstruction_loss: 454.0508 - r_squared: 0.3991 - r_squared_linreg: 0.4526


In [16]:
split_per_node_type, evaluation_per_node_type = trainer.estimator.evaluate_per_node_type()

Evaluation for Astrocytes with 22879 cells
{'loss': 440.73388671875, 'custom_mae': 0.8678698539733887, 'custom_mean_sd': 1.3203126192092896, 'custom_mse': 2.258192300796509, 'custom_mse_scaled': 1.133967399597168, 'gaussian_reconstruction_loss': 440.7337341308594, 'r_squared': 0.3545365631580353, 'r_squared_linreg': 0.3949345350265503}
Evaluation for Endothelial with 20335 cells
{'loss': 412.3866271972656, 'custom_mae': 0.7204565405845642, 'custom_mean_sd': 1.3203126192092896, 'custom_mse': 1.8185585737228394, 'custom_mse_scaled': 0.910759687423706, 'gaussian_reconstruction_loss': 412.3864440917969, 'r_squared': 0.34632834792137146, 'r_squared_linreg': 0.3763602077960968}
Evaluation for L2/3 IT with 41996 cells
{'loss': 459.06640625, 'custom_mae': 1.073946237564087, 'custom_mean_sd': 1.3203126192092896, 'custom_mse': 2.3879148960113525, 'custom_mse_scaled': 1.2783193588256836, 'gaussian_reconstruction_loss': 459.0669250488281, 'r_squared': 0.4308493435382843, 'r_squared_linreg': 0.4747