# SIGRN
SIGRN: Inferring Gene Regulatory Network with Soft Introspective Variational Autoencoders
# Architecture
![SIGRN](https://github.com/lryup/SIGRN/blob/main/images/SIGRN_arc.png?raw=true)

# Dependencies
- python =3.8
- torch==2.1.0
- scanpy==1.9.1
- other detailed installation packages can be found in requirements.txt
- CUDA toolkit 11.0 or later.

# Installation

In [15]:
pip install scSIGRN==0.0.6

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Note: you may need to restart the kernel to use updated packages.


In [16]:
import scSIGRN as sc
from scSIGRN  import LightLogger,runSIGRN

In [22]:
DEFAULT_GRNVAE_CONFIGS = {
    # Train/Test split
    'train_split': 1.0,# Use all data for training
    'train_split_seed': None, # Seed for random splitting

    # Neural Net Definition
    'hidden_dim': 128,#  Size of dimension in the MLP layers
    'z_dim': 1,# Size of dimension of Z
    'A_dim': 1,#Number of Adjacency matrix to be modeled at the same time
    'train_on_non_zero': True,# Whether to train on non-zero data only
    'dropout_augmentation_p': 0.1,#Probability of augmented dropout. For example, 0.1 means that 10% of data will be temporarily assign to zero in each forward  pass
    'dropout_augmentation_type': 'all',#Choose among 'all' (default), 'belowmean', 'belowhalfmean'. This option specifies where dropout augmentation would happen. If 'belowmean' is selected, the augmentation would only happen on values below global mean.
    'cuda': True,

    # Loss term, hyperparameters
    'alpha': 100,
    # 'beta': 1,
    'chi': 0.5,
    'h_scale': 0,
    'delayed_steps_on_sparse': 30,

    # Neural Net Training
    'number_of_opt': 2,# Number of optimizations
    'batch_size': 64, # Size of training batches
    'n_epochs': 120, # Number of training epochs
    # 'schedule': [120, 240],
    'eval_on_n_steps': 10,# Evaluation frequency
    'early_stopping': 0,# Early stopping criteria
    'lr_nn': 1e-4, # Learning rate for neural network
    'lr_adj': 2e-5, # Learning rate for adjacency matrix
    'K1': 1,
    'K2': 1
}

# Data Preparation
In our study, we trained our model using data from [BEENLINE](https://bcb.cs.tufts.edu/DAZZLE/BEELINE.zip).
You can download the datasets from the provided link. 

In [18]:
data, ground_truth = sc.data.load_beeline(
    data_dir='data',
    benchmark_data='hESC', #hESC,hHep,mDC,mESC,mHSC-E,mHSC-GM,mHSC-L
    benchmark_setting='500_STRING'#500_STRING,1000_STRING,1000_Non-ChIP,500_Non-ChIP
)

In [19]:
data,ground_truth

(AnnData object with n_obs × n_vars = 758 × 910,
 (array([False, False, False, ..., False, False, False]),
  array([0., 0., 0., ..., 0., 0., 0.]),
  {(663, 84),
   (285, 510),
   (211, 332),
   (292, 406),
   (819, 597),
   (378, 180),
   (319, 539),
   (630, 401),
   (240, 395),
   (594, 478),
   (351, 757),
   (735, 603),
   (383, 789),
   (571, 760),
   (780, 754),
   (350, 612),
   (225, 123),
   (613, 348),
   (795, 239),
   (530, 579),
   (861, 333),
   (67, 589),
   (248, 335),
   (457, 100),
   (443, 202),
   (900, 32),
   (758, 177),
   (364, 546),
   (84, 498),
   (172, 870),
   (125, 795),
   (350, 239),
   (667, 849),
   (319, 660),
   (544, 104),
   (182, 17),
   (248, 191),
   (177, 576),
   (169, 721),
   (100, 887),
   (807, 334),
   (349, 688),
   (231, 287),
   (825, 617),
   (369, 337),
   (176, 208),
   (430, 204),
   (791, 246),
   (562, 423),
   (571, 737),
   (284, 415),
   (310, 287),
   (239, 672),
   (699, 248),
   (51, 850),
   (154, 305),
   (487, 594),
   (

In [20]:
logger = LightLogger()
vae, adjs, result_rec = runSIGRN(
    data.X, DEFAULT_GRNVAE_CONFIGS, ground_truth=ground_truth, logger=logger)

{'epoch': 0.0, 'loss_rec': 1.68656, 'loss_kl': 0.01993, 'loss_sparse': 0.00121, 'loss': 2.70647, 'loss_ma': 1.01992, 'lossG': 1.68657, 'negative_adj': 0, 'AUPR': 0.03075473942790295, 'AUROC': 0.572956125941996, 'AUPRR': 1.2786515907600862, 'EP': 164, 'EPR': 1.6016970266441726, 'true_edge': 4257}
{'epoch': 10.0, 'loss_rec': 0.73547, 'loss_kl': 0.5159, 'loss_sparse': 0.00172, 'loss': 2.00445, 'loss_ma': 1.26898, 'lossG': 1.13358, 'negative_adj': 10510, 'AUPR': 0.04174114436778027, 'AUROC': 0.6273977833256792, 'AUPRR': 1.7354196991695314, 'EP': 273, 'EPR': 2.6662395626454822, 'true_edge': 4257}
{'epoch': 20.0, 'loss_rec': 0.51196, 'loss_kl': 0.51907, 'loss_sparse': 0.00173, 'loss': 1.75259, 'loss_ma': 1.24063, 'lossG': 0.96335, 'negative_adj': 64100, 'AUPR': 0.053456761144731225, 'AUROC': 0.6557372343496567, 'AUPRR': 2.2225053421385224, 'EP': 388, 'EPR': 3.789380770353286, 'true_edge': 4257}
{'epoch': 30.0, 'loss_rec': 0.49349, 'loss_kl': 0.48686, 'loss_sparse': 0.00155, 'loss': 1.88484, 

In [21]:
A = vae.get_adj()
ppi_auc = sc.get_metrics_auc(A, ground_truth)
ppi_auc

{'AUPR': 0.06242845386484906,
 'AUROC': 0.6602465862469957,
 'AUPRR': 2.595510263714331,
 'EP': 525,
 'EPR': 5.127383774318235,
 'true_edge': 4257}