In [1]:
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import pandas as pd
import h5py
import time
import pickle


import scanpy.api as sc
import sceb.data_loader as dl
import sceb.scdd as sd
from sceb.util import *

%load_ext autoreload
%autoreload 2

  from ._conv import register_converters as _register_converters


In [68]:
## toy example from 4k 
data = dl.load_10x_4k()
mean_count = np.array(data.X.mean(axis=0)).reshape(-1)
sort_idx = np.argsort(mean_count)[::-1]

marker_gene_dic={}
marker_gene_dic['T-cells']= ['IL7R','CD3G','CD3E','CD3D','LCK']
marker_gene_dic['NK-cells']= ['NKG7','PRF1','KLRD1','GZMA','HOPX','CST7']
marker_gene_dic['B-cells']= ['CD79A','BANK1','IGHD','LINC00926','MS4A1']
marker_gene_dic['myeloid']= ['S100A8','S100A9']
marker_gene_dic['Innate Immunity']= ['MNDA','FGL2','CLEC7A','CD6S','IFI30']
marker_gene_dic['HSC']= ['CD34','GATA2','PRSS57','SMIM24','CYTL1']

gene_list = []
for key_ in marker_gene_dic.keys():
    for gene in marker_gene_dic[key_]:
        if gene in data.var_names:
            if data[:,gene].X.mean()>0.5:
                gene_list.append(gene)
G = len(gene_list)

Y = np.array(data[:,gene_list].X.todense()).astype(dtype=int)


Variable names are not unique. To make them unique, call `.var_names_make_unique`.


In [69]:

p_true = np.ones([Y.shape[0]])
p_true = p_true / np.sum(p_true)

## Making each column of x to have the same mean 
x_true = Y / Y.mean(axis=0)
x_true = x_true.clip(max=np.percentile(x_true,99.9))
x_true = x_true/x_true.mean(axis=0)
x_true = x_true/x_true.sum()*Y.shape[0]

## calculate the related quantities

kappa = 1*G
temp = np.exp(-kappa*x_true)
inactive_true = np.mean(temp,axis=0)

In [76]:
Nc = 30000

for Nr in [5*G,10*G]:
    print('## Nr=%d'%Nr)
    data,size_factor = dl.poi_data_gen_nd(p_true,x_true,Nc=Nc,Nr=Nr,random_seed=0)
    print('max',np.max(data.X.todense()))
    p0_ml,p0_dd = sd.dd_inactive_prob(data,relative_depth=Nr/kappa,size_factor=size_factor,verbose=False)
    print('relative depth=%0.4f'%(Nr/kappa))
    err_dd = np.log10(np.mean((p0_dd-inactive_true)**2)) - np.log10(np.mean(inactive_true**2))
    print(err_dd.mean())
    print(p0_dd-inactive_true)
    print('')

## Nr=70
max 152.0
relative depth=5.0000
-5.0301276719149515
[ 0.00174211  0.0025752   0.00165316 -0.00086123 -0.00101834 -0.00130779
 -0.00196834 -0.00261531 -0.00050369  0.00089386 -0.00381493 -0.00431522
 -0.00285633  0.00076344]

## Nr=140
max 288.0
relative depth=10.0000
-4.456785510826151
[ 0.0063921   0.00659681  0.00824765  0.0073807   0.0058104   0.00102944
  0.00015683 -0.00066176  0.00146555  0.00193245  0.00010354  0.00096661
 -0.00060435  0.00333609]

