## This notebook runs inference on a GEARS model trained on Norman

- Download trained GEARS model and Dataloader from Dataverse
- Model is trained on Norman et al. 2019 (Science) dataset
- Example below showing how to make perturbation outcome prediction and GI prediction

In [1]:
import sys
sys.path.append('../')

from gears import PertData, GEARS
from gears.utils import dataverse_download
from zipfile import ZipFile 

  from .autonotebook import tqdm as notebook_tqdm


### Download saved model and dataloader

In [2]:
## Download dataloader from dataverse
dataverse_download('https://dataverse.harvard.edu/api/access/datafile/6979957', 'norman_umi_go.tar.gz')

## Extract and set up dataloader directory
import tarfile
with tarfile.open('norman_umi_go.tar.gz', 'r:gz') as tar:
    tar.extractall()

Downloading...
100%|█████████████████████████████████████████████████████████| 1.10G/1.10G [01:56<00:00, 9.43MiB/s]


In [3]:
## Download model from dataverse
dataverse_download('https://dataverse.harvard.edu/api/access/datafile/6979956', 'model.zip')

## Extract and set up model directory
with ZipFile(('model.zip'), 'r') as zip:
    zip.extractall(path = './')

Downloading...
100%|█████████████████████████████████████████████████████████| 10.9M/10.9M [00:01<00:00, 10.0MiB/s]


### Load model and dataloader

In [4]:
data_path = './'
data_name = 'norman_umi_go'
model_name = 'gears_misc_umi_no_test'

pert_data = PertData(data_path)
pert_data.load(data_path = data_path + data_name)
pert_data.prepare_split(split = 'no_test', seed = 1)
pert_data.get_dataloader(batch_size = 32, test_batch_size = 128)

gears_model = GEARS(pert_data, device = 'cuda:5', 
                        weight_bias_track = False, 
                        proj_name = 'gears', 
                        exp_name = model_name)
gears_model.load_pretrained('./model_ckpt')

Downloading...
100%|█████████████████████████████████████████████████████████| 9.46M/9.46M [00:01<00:00, 6.10MiB/s]
Downloading...
100%|███████████████████████████████████████████████████████████| 559k/559k [00:00<00:00, 1.53MiB/s]
These perturbations are not in the GO graph and is thus not able to make prediction for...
['RHOXF2BB+ctrl' 'LYL1+IER5L' 'ctrl+IER5L' 'KIAA1804+ctrl' 'IER5L+ctrl'
 'RHOXF2BB+ZBTB25' 'RHOXF2BB+SET']
Local copy of pyg dataset is detected. Loading...
Done!
Creating new splits....


test_pert_genes None
test_perts None


Saving new splits at ./norman_umi_go/splits/norman_umi_go_no_test_1_0.75.pkl
Done!
Creating dataloaders....
Done!


In [6]:
gears_model = GEARS(pert_data, device = 'cuda:5', 
                        weight_bias_track = False, 
                        proj_name = 'gears', 
                        exp_name = model_name)
gears_model.load_pretrained('./model_ckpt')

### Make transcriptional outcome predictions

In [None]:
gears_model.predict([['CNN1', 'CBL']])

### Make GI outcome prediction

In [10]:
gears_model.GI_predict(['CNN1', 'CBL'], GI_genes_file=None)

{'ts': TheilSenRegressor(fit_intercept=False, max_iter=1000, max_subpopulation=100000,
                   random_state=1000),
 'c1': 1.0942881586568658,
 'c2': 0.684177476331237,
 'mag': 1.290567856912458,
 'dcor': 0.8649321390185458,
 'dcor_singles': 0.7813616432466521,
 'dcor_first': 0.827889410401002,
 'dcor_second': 0.8135062057416026,
 'corr_fit': 0.9303117736028462,
 'dominance': 0.20396292696340834,
 'eq_contr': 0.9826266594563244}

In [None]:
## If reproducing results from paper, you can use the same gene set ()
dataverse_download('https://dataverse.harvard.edu/api/access/datafile/6979958', 
                   'genes_with_hi_mean.npy')

gears_model.GI_predict(['CNN1', 'CBL'], GI_genes_file='./genes_with_hi_mean.npy')