In [1]:
import pandas as pd
import numpy as np
import scanpy.api as sc
from sklearn.decomposition import PCA
from quasildr.graphdr import graphdr

#Planarian comparisons

data1 =  pd.read_csv('./data/other/fincher_2018.data.gz',sep='\t',index_col=0)
data2  = pd.read_csv('./data/other/plass_2018.full.data.gz',sep='\t')


inddict = dict(zip(data1.index.str.replace('dd_Smed_v4_','').str.replace('_[0-9]+$',''),range(data1.shape[0])))
temp = []
for i in  data2.index.str.replace('dd_Smed_v6_',''):
    if i in inddict:
        temp.append(inddict[i])
    else:
        temp.append(-1)
temp = np.asarray(temp)

data1 = data1.iloc[temp,:]
data1 = data1.iloc[temp!=-1,:]
data2 = data2.iloc[temp!=-1,:]


#Use a common linear dimensionality reduction for both datasets
adata = sc.AnnData(np.hstack([data1,data2]).T, np.concatenate([data1.columns.values,data2.columns.values]))
adata.var_names_make_unique()
adata.obs_names_make_unique()

sc.pp.recipe_zheng17(adata)
sc.tl.pca(adata)

pca50 = adata.obsm['X_pca']
pca50 = pca50/pca50[:,0].std()

pd.DataFrame(pca50[:50562,:],index=data1.columns, columns=['D'+str(i) for i in range(pca50.shape[1])]).to_csv(
 './figures/fincher_2018.pca.txt',index_label=False,sep='\t')
pd.DataFrame(pca50[50562:,:],index=data2.columns, columns=['D'+str(i) for i in range(pca50.shape[1])]).to_csv(
 './figures/plass_2018.pca.txt',index_label=False,sep='\t')


#Run GraphDR on each dataset
dr1 = graphdr(pca50[:50562,:], n_neighbors=15,_lambda=100,refine_iter=0,no_rotation=True,rescale=False)
dr2 = graphdr(pca50[50562:,:], n_neighbors=15,_lambda=100,refine_iter=0,no_rotation=True,rescale=False)
pd.DataFrame(dr1,index=data1.columns, columns=['D'+str(i) for i in range(dr1.shape[1])]).to_csv(
 './figures/fincher_2018.graphdr.txt',index_label=False,sep='\t')
pd.DataFrame(dr2,index=data2.columns, columns=['D'+str(i) for i in range(dr2.shape[1])]).to_csv(
 './figures/plass_2018.graphdr.txt',index_label=False,sep='\t')


#Run t-SNE on each dataset
adata1 = sc.AnnData(pca50[:50562,:])
sc.tl.tsne(adata1)
adata2 = sc.AnnData(pca50[50562:,:])
sc.tl.tsne(adata2)

pd.DataFrame(adata1.obsm['X_tsne'],index=data1.columns, columns=['D'+str(i) for i in range(2)]).to_csv(
 './figures/fincher_2018.tsne.txt',index_label=False,sep='\t')
pd.DataFrame(adata2.obsm['X_tsne'],index=data2.columns, columns=['D'+str(i) for i in range(2)]).to_csv(
 './figures/plass_2018.tsne.txt',index_label=False,sep='\t')

  from pandas.core.index import RangeIndex


In [4]:
#GraphDR with dataset alignment 
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from quasildr.graphdr import knn_graph
m='euclidean'
adata.obs['batch'] = np.concatenate([np.repeat('Fincher_et_al',50562),np.repeat('Plass_et_al',21612)])

k=50

nn_batch={}
batchs = np.unique(adata.obs['batch'])
for s in batchs:
    nn_batch[s]=NearestNeighbors(k,metric=m).fit(pca50[adata.obs['batch'].values==s,:])

s=[]
t=[]
w=[]
for i in range(len(batchs)-1):
    dis, ind = nn_batch[batchs[i]].kneighbors(pca50[adata.obs['batch'].values==batchs[i+1],:], k)
    s.append(np.repeat(np.argwhere(adata.obs['batch'].values==batchs[i+1]), ind.shape[1]))
    t.append(np.argwhere(adata.obs['batch'].values==batchs[i])[ind].flatten())
    w.append(np.ones(k*np.sum(adata.obs['batch'].values==batchs[i+1])))

for i in range(1, len(batchs)):
    dis, ind = nn_batch[batchs[i]].kneighbors(pca50[adata.obs['batch'].values==batchs[i-1],:], k)
    s.append(np.repeat(np.argwhere(adata.obs['batch'].values==batchs[i-1]), ind.shape[1]))
    t.append(np.argwhere(adata.obs['batch'].values==batchs[i])[ind].flatten())
    w.append(np.ones(k*np.sum(adata.obs['batch'].values==batchs[i-1])))

g = csr_matrix((np.concatenate(w),(np.concatenate(s).astype(np.int),np.concatenate(t).astype(np.int))),(adata.obsm['X_pca'].shape[0],adata.obsm['X_pca'].shape[0]))
g0 = knn_graph(pca50)


In [5]:
dr3 = graphdr(pca50, custom_graph=g.multiply(g.T)+g0,_lambda=100,refine_iter=0,no_rotation=True,rescale=False)
dr31 = dr3[:50562,:]
dr32 = dr3[50562:,:]

In [6]:
pd.DataFrame(dr31,index=data1.columns, columns=['D'+str(i) for i in range(dr31.shape[1])]).to_csv(
 './figures/fincher_2018.aligned.graphdr.txt',index_label=False,sep='\t')
pd.DataFrame(dr32,index=data0.columns, columns=['D'+str(i) for i in range(dr32.shape[1])]).to_csv(
 './figures/plass_2018.aligned.graphdr.txt',index_label=False,sep='\t')

NameError: name 'data0' is not defined