# Getting Started with IntroGRN
 

In [1]:
import numpy as np
from data import load_beeline
from logger import LightLogger
from IntroGRN import runIntroGRN
from evaluate import extract_edges, get_metrics

## Model Configurations

The parameter design is referred to GRN-VAE paper.


The three key concepts proposed in the IntroGRN paper are controlled by the following parameters. 

- `delayed_steps_on_sparse`: Number of delayed steps on introducing the sparse loss. 
- `dropout_augmentation_p`: The proportion of data that will be randomly masked as dropout in each traing step.
- `train_on_non_zero`: Whether to train the model on non-zero expression data

The exact parameters are listed as below for referencing purpose.

In [2]:
# For referencing purpose. 
DEFAULT_GRNVAE_CONFIGS = {
    # Train/Test split
    'train_split': 1.0,
    'train_split_seed': None, 
    
    # Neural Net Definition
    'hidden_dim': 128,
    'z_dim': 1,
    'A_dim': 1,
    'train_on_non_zero': True,
    'dropout_augmentation_p':0.1,
    'dropout_augmentation_type': 'all',
    'cuda': True,
    
    # Loss
    'alpha': 100,
    'beta': 1,
    'chi':0.5,
    'h_scale': 0,
    'delayed_steps_on_sparse': 30,
    
    # Neural Net Training
    'number_of_opt': 2,
    'batch_size': 64,
    'n_epochs': 120,
    'marginal':10,
    #'schedule': [120, 240],
    'eval_on_n_steps': 10,
    'early_stopping': 0,
    'lr_nn': 1e-4,
    'lr_adj': 2e-5,
    'K1': 1,
    'K2': 1
}

## Data loading


In [3]:
# Load data from a BEELINE benchmark
data, ground_truth = load_beeline(
    data_dir='data', 
    benchmark_data='hESC', 
    benchmark_setting='500_STRING'
)

## Model Training


In [4]:
logger = LightLogger()
vae, adjs, result_rec = runIntroGRN(
        data.X, DEFAULT_GRNVAE_CONFIGS, ground_truth=ground_truth, logger=logger)

{'epoch': 0.0, 'loss_rec': 1.69468, 'loss_kl': 0.02026, 'loss_sparse': 0.00127, 'loss': 4.21488, 'loss_ma': 2.5202, 'lossG': 1.69494, 'negative_adj': 0, 'AUPR': 0.02442962598409241, 'AUPRR': 1.0156802075810543, 'EP': 117, 'EPR': 1.1426740982766352}
{'epoch': 10.0, 'loss_rec': 0.88669, 'loss_kl': 0.41838, 'loss_sparse': 0.00182, 'loss': 3.72553, 'loss_ma': 2.83884, 'lossG': 1.20509, 'negative_adj': 30549, 'AUPR': 0.04877106176426185, 'AUPRR': 2.0276938406232503, 'EP': 359, 'EPR': 3.506153857105231}
{'epoch': 20.0, 'loss_rec': 0.68735, 'loss_kl': 0.47527, 'loss_sparse': 0.00175, 'loss': 3.55991, 'loss_ma': 2.87256, 'lossG': 1.09882, 'negative_adj': 84516, 'AUPR': 0.05632915369707471, 'AUPRR': 2.3419272385571666, 'EP': 455, 'EPR': 4.443732604409137}
{'epoch': 30.0, 'loss_rec': 0.61943, 'loss_kl': 0.47549, 'loss_sparse': 0.00161, 'loss': 3.65947, 'loss_ma': 2.87876, 'lossG': 1.00676, 'negative_adj': 132411, 'AUPR': 0.05577217853359097, 'AUPRR': 2.3187705741844487, 'EP': 477, 'EPR': 4.65859

The learned adjacency matrix could be obtained by the `get_adj()` method. For BEELINE benchmarks, you can get the performance metrics of this run using the `get_metrics` function. 

In [5]:
A = vae.get_adj()
get_metrics(A, ground_truth)

{'AUPR': 0.06113034351341905,
 'AUPRR': 2.541540342436695,
 'EP': 517,
 'EPR': 5.0492522120429095}