In [1]:
#xenopus_briggs2018
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
import scanpy.api as sc
from sklearn.decomposition import PCA
from sklearn.neighbors import NearestNeighbors
from scipy.stats import binom

from quasildr.graphdr import graphdr

adata=pd.read_csv('./data/other/briggs_2018.data.gz', sep='\t')
anno=pd.read_csv('./data/other/briggs_2018.anno', sep='\t')
adata = adata.iloc[:,anno['Cluster_name'].values!='Outlier']
anno = anno.iloc[anno['Cluster_name'].values!='Outlier',:]

anno['Time']=anno['Developmental_stage'].str.replace('Stage_','').map(float)

datapca=PCA(50).fit_transform(adata.values.T)

stages = np.unique(anno['Developmental_stage'])
stages = stages[-1:].tolist() + stages[:-1].tolist()

def ksmooth(x, k, rescale_k=False):
    if rescale_k and k > 1. / 0.632:
        k = k * 0.632
    return np.maximum(1-binom.cdf(np.maximum(x-k-1,0), x-1, .632), x<=k)


m='correlation'
l=100000
k=5
s=[]
t=[]
w=[]
for i in range(len(stages)-1):
    for r in np.unique(anno['Replicate_name'][anno['Developmental_stage'].values==stages[i]]):
        nn_stage=NearestNeighbors(k,metric=m).fit(datapca[(anno['Developmental_stage'].values==stages[i])*np.asarray(anno['Replicate_name']==r),:])
        _, ind = nn_stage.kneighbors(datapca[ (anno['Developmental_stage'].values==stages[i+1]),:], k)
        s.append(np.repeat(np.argwhere((anno['Developmental_stage'].values==stages[i+1])), ind.shape[1]))
        t.append(np.argwhere((anno['Developmental_stage'].values==stages[i])*np.asarray(anno['Replicate_name']==r))[ind].flatten())
        w.append(1*ksmooth(np.tile(np.arange(1,k+1),np.sum(anno['Developmental_stage'].values==stages[i+1])),k=1,rescale_k=False))

for i in range(1,len(stages)):
    for r in np.unique(anno['Replicate_name'][anno['Developmental_stage'].values==stages[i]]):
        nn_stage=NearestNeighbors(k,metric=m).fit(datapca[(anno['Developmental_stage'].values==stages[i])*np.asarray(anno['Replicate_name']==r),:])
        _, ind = nn_stage.kneighbors(datapca[ (anno['Developmental_stage'].values==stages[i-1]),:], k)
        s.append(np.repeat(np.argwhere((anno['Developmental_stage'].values==stages[i-1])), ind.shape[1]))
        t.append(np.argwhere((anno['Developmental_stage'].values==stages[i])*np.asarray(anno['Replicate_name']==r))[ind].flatten())
        w.append(1*ksmooth(np.tile(np.arange(1,k+1),np.sum(anno['Developmental_stage'].values==stages[i-1])),k=1,rescale_k=False))


for i in range(len(stages)):
    for r in np.unique(anno['Replicate_name'][anno['Developmental_stage'].values==stages[i]]):
        nn_stage=NearestNeighbors(k,metric=m).fit(datapca[(anno['Developmental_stage'].values==stages[i])*np.asarray(anno['Replicate_name']==r),:])
        _, ind = nn_stage.kneighbors(datapca[ (anno['Developmental_stage'].values==stages[i]),:], k)
        s.append(np.repeat(np.argwhere((anno['Developmental_stage'].values==stages[i])), ind.shape[1]))
        t.append(np.argwhere((anno['Developmental_stage'].values==stages[i])*np.asarray(anno['Replicate_name']==r))[ind].flatten())
        w.append(1*ksmooth(np.tile(np.arange(1,k+1),np.sum(anno['Developmental_stage'].values==stages[i])),k=1,rescale_k=False))



g = csr_matrix((np.concatenate(w),(np.concatenate(s).astype(np.int),np.concatenate(t).astype(np.int))),(datapca.shape[0],datapca.shape[0]))


  from pandas.core.index import RangeIndex


In [2]:
Zt=graphdr(datapca, custom_graph = g, _lambda=l, refine_iter=0, rescale=True)

pd.DataFrame(Zt,index=adata.columns, columns=['D'+str(i) for i in range(Zt.shape[1])]).to_csv(
 './figures/xenopus_briggs2018.graphdr.txt',index_label=False,sep='\t')
