In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scanpy as sc

sc.settings.verbosity = 0

In [5]:
import GenKI as gk
from GenKI.preprocesing import build_adata
from GenKI.dataLoader import DataLoader
from GenKI.train import VGAE_trainer
from GenKI import utils

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
# subset data as an example

adata = build_adata("data/microglial_seurat_WT.h5ad")
adata = adata[:100, :300].copy()
adata

ValueError: incorrect file path given to counts

In [4]:
# load data

data_wrapper =  DataLoader(
                adata, # adata object
                target_gene = ["TUBG1"], # KO gene name
                target_cell = None, # obsname for cell type, if none use all
                obs_label = "ident", # colname for genes
                GRN_file_dir = "GRNs", # folder name for GRNs
                rebuild_GRN = True, # whether build GRN by pcNet
                pcNet_name = "pcNet_example", # GRN file name
                verbose = True, # whether verbose
                n_cpus = 8, # multiprocessing
                )

data_wt = data_wrapper.load_data()
data_ko = data_wrapper.load_kodata()

use all the cells (100) in adata
build GRN
ray init, using 8 CPUs
execution time of making pcNet: 6.26 s
GRN has been built and saved in "GRNs\pcNet_example.npz"
init completed



In [5]:
# init trainer

hyperparams = {"epochs": 100, 
               "lr": 7e-4, 
               "beta": 1e-4, 
               "seed": 8096}
log_dir=None 

sensei = VGAE_trainer(data_wt, 
                     epochs=hyperparams["epochs"], 
                     lr=hyperparams["lr"], 
                     log_dir=log_dir, 
                     beta=hyperparams["beta"],
                     seed=hyperparams["seed"],
                     verbose=False,
                     )

In [6]:
# %%timeit
sensei.train()

In [7]:
# sensei.save_model('model_example')

In [8]:
# get distance between wt and ko

z_mu_wt, z_std_wt = sensei.get_latent_vars(data_wt)
z_mu_ko, z_std_ko = sensei.get_latent_vars(data_ko)
dis = gk.utils.get_distance(z_mu_ko, z_std_ko, z_mu_wt, z_std_wt, by="KL")
print(dis.shape)

(300,)


In [9]:
# raw ranked gene list

res_raw = utils.get_generank(data_wt, dis, rank=True)
res_raw.head()

Unnamed: 0,dis,rank
TUBG1,2.56549,1
TYROBP,0.001753,2
LST1,0.001628,3
CYBA,0.001517,4
LAT2,0.001438,5


In [10]:
# if permutation test

null = sensei.pmt(data_ko, n=100, by="KL")
res = utils.get_generank(data_wt, dis, null,)
#                       save_significant_as = 'gene_list_example')
res

Permutating: 100%|██████████| 100/100 [00:03<00:00, 33.02it/s]


Unnamed: 0,dis,index,hit,rank
TUBG1,2.56549,163,100,1
