# Getting Started with IntroGRN
 

In [1]:
import numpy as np
from data import load_beeline
from logger import LightLogger
from SIGRN import runSIGRN
from evaluate import extract_edges, get_metrics_auc

## Model Configurations

The parameter design is referred to GRN-VAE paper.


The three key concepts proposed in the IntroGRN paper are controlled by the following parameters. 

- `delayed_steps_on_sparse`: Number of delayed steps on introducing the sparse loss. 
- `dropout_augmentation_p`: The proportion of data that will be randomly masked as dropout in each traing step.
- `train_on_non_zero`: Whether to train the model on non-zero expression data

The exact parameters are listed as below for referencing purpose.

In [2]:
DEFAULT_GRNVAE_CONFIGS = {
    # Train/Test split
    'train_split': 1.0,# Use all data for training
    'train_split_seed': None, # Seed for random splitting

    # Neural Net Definition
    'hidden_dim': 128,#  Size of dimension in the MLP layers
    'z_dim': 1,# Size of dimension of Z
    'A_dim': 1,#Number of Adjacency matrix to be modeled at the same time
    'train_on_non_zero': True,# Whether to train on non-zero data only
    'dropout_augmentation_p': 0.1,#Probability of augmented dropout. For example, 0.1 means that 10% of data will be temporarily assign to zero in each forward  pass
    'dropout_augmentation_type': 'all',#Choose among 'all' (default), 'belowmean', 'belowhalfmean'. This option specifies where dropout augmentation would happen. If 'belowmean' is selected, the augmentation would only happen on values below global mean.
    'cuda': True,

    # Loss term, hyperparameters
    'alpha': 100,
    # 'beta': 1,
    'chi': 0.5,
    'h_scale': 0,
    'delayed_steps_on_sparse': 30,

    # Neural Net Training
    'number_of_opt': 2,# Number of optimizations
    'batch_size': 64, # Size of training batches
    'n_epochs': 120, # Number of training epochs
    # 'schedule': [120, 240],
    'eval_on_n_steps': 10,# Evaluation frequency
    'early_stopping': 0,# Early stopping criteria
    'lr_nn': 1e-4, # Learning rate for neural network
    'lr_adj': 2e-5, # Learning rate for adjacency matrix
    'K1': 1,
    'K2': 1
}

## Data loading
BEELINE benchmarks could be loaded by the load_beeline function, where you specify where to look for data and which benchmark to load. If it's the first time, this function will download the files automatically.


In [3]:
# Load data from a BEELINE benchmark
data, ground_truth = load_beeline(
    data_dir='data', 
    benchmark_data='hESC', 
    benchmark_setting='500_STRING'
)

In [4]:
data

AnnData object with n_obs × n_vars = 758 × 910

## Model Training


In [6]:
logger = LightLogger()
vae, adjs, result_rec = runSIGRN(
        data.X, DEFAULT_GRNVAE_CONFIGS, ground_truth=ground_truth, logger=logger)

{'epoch': 0.0, 'loss_rec': 1.63507, 'loss_kl': 0.0286, 'loss_sparse': 0.00124, 'loss': 2.66111, 'loss_ma': 1.02604, 'lossG': 1.638, 'negative_adj': 0, 'AUPR': 0.024300847101724185, 'AUROC': 0.5008846082740657, 'AUPRR': 1.0103261279868359, 'EP': 106, 'EPR': 1.0352432001480627, 'true_edge': 4257}
{'epoch': 10.0, 'loss_rec': 0.71228, 'loss_kl': 0.52192, 'loss_sparse': 0.00174, 'loss': 1.98317, 'loss_ma': 1.27088, 'lossG': 1.11949, 'negative_adj': 15402, 'AUPR': 0.04113493422707818, 'AUROC': 0.6250494421875435, 'AUPRR': 1.7102160533197353, 'EP': 288, 'EPR': 2.8127362419117174, 'true_edge': 4257}
{'epoch': 20.0, 'loss_rec': 0.49394, 'loss_kl': 0.51414, 'loss_sparse': 0.00175, 'loss': 1.73117, 'loss_ma': 1.23723, 'lossG': 0.94298, 'negative_adj': 68271, 'AUPR': 0.05300461080571384, 'AUROC': 0.6546410550936744, 'AUPRR': 2.203706849255739, 'EP': 392, 'EPR': 3.828446551490949, 'true_edge': 4257}
{'epoch': 30.0, 'loss_rec': 0.49875, 'loss_kl': 0.4794, 'loss_sparse': 0.00157, 'loss': 1.88401, 'lo

The learned adjacency matrix could be obtained by the `get_adj()` method. For BEELINE benchmarks, you can get the performance metrics of this run using the `get_metrics` function. 

In [8]:
A = vae.get_adj()
get_metrics_auc(A, ground_truth)

{'AUPR': 0.0622859198764375,
 'AUROC': 0.6584840362216745,
 'AUPRR': 2.589584305165826,
 'EP': 519,
 'EPR': 5.068785102611741,
 'true_edge': 4257}