In [1]:
%pylab inline

import os,sys
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib as mpl
import matplotlib.gridspec as gridspec

from collections import defaultdict, Counter, OrderedDict

import cytograph as cg
import loompy
import palettable

sys.path.append(os.path.realpath(os.path.join(os.getcwd(), '..', '..')))
from scbeta_scrnaseq import utils
from scbeta_scrnaseq import vis
import scbeta_scrnaseq.cytograph_inmem_utils as cgm
import scbeta_scrnaseq.cytograph_analyses as cga



Populating the interactive namespace from numpy and matplotlib


In [2]:
full_ds = loompy.connect(f'../data/complete_processing/stage6.processed.loom', 'r')
full_ds.vals = sp.sparse.csr_matrix(full_ds.layers[""][:, :])


### Make a copy of loom file, keeping only the SC-beta cells

In [3]:
%mkdir ../data/pseudotime/
attributes_to_copy = ['CellID', 'CellBatch', 'CellWeek', 'CellFlask', 'DetailedLabels', 'Labels', '_TrainFilter', '_Valid']

f_cells = np.where(full_ds.ca.Labels == 'scbeta')[0]
min_cells_per_valid_gene = 10
%rm f'../data/pseudotime/stage6.scbeta_pseudotime.loom'
loompy.create(f'../data/pseudotime/stage6.scbeta_pseudotime.loom',
              {'': full_ds.vals[:, f_cells]},
              {'Gene': full_ds.ra.Gene,
               '_Valid': (full_ds.vals[:, f_cells].sum(1).A.ravel() > min_cells_per_valid_gene),
               '_Regress': (full_ds.vals[:, f_cells].sum(1).A.ravel() > min_cells_per_valid_gene),
                  },
              {a: full_ds.ca[a][f_cells] for a in attributes_to_copy},
             )
full_ds.close()

### Computation of pseudotime

In [4]:
full_ds.close()

In [5]:
ds = loompy.connect(f'../data/pseudotime/stage6.scbeta_pseudotime.loom')
ds.vals = sp.sparse.csr_matrix(ds.layers[""][:, :])

In [6]:
seed = 91829211
tds = {}
for tp in ['scbeta_pseudotime']:
    tds[tp] = loompy.connect(f'../data/pseudotime/stage6.{tp}.loom')
    tds[tp].vals = sp.sparse.csr_matrix(tds[tp].layers[""][:, :])
    _tds = tds[tp]
    
    _valid_genes = _tds.ra._Valid
#     _valid_genes[np.where(np.isin(_tds.ra.Gene, list(cell_cycle_set)))[0]] = 0
    _tds.ra._Valid = _valid_genes
    
    cga.highvar_pca(_tds, _tds.vals, namespace='', seed=seed,
                train_cells=np.where(_tds.ca['_TrainFilter'] > 0)[0],
                n_highvar_pcs=50,
                n_highvar_genes=2000)
    
#     tds.ca["HighVarTSNE"] = cg.TSNE(perplexity=100).layout(tds.ca[r"HighVarPCA"][:, :10])

In [7]:
time_alignment_vector = {
    'scbeta_pseudotime': pd.Series(tds['scbeta_pseudotime'].ca.CellWeek, tds['scbeta_pseudotime'].ca.CellID),
               }

for tp in tds.keys():
    _tds = tds[tp]
    import scanpy.api as sc
    adata = sc.AnnData(_tds.ca.HighVarPCA[:, :10])
    adata.obs_names = _tds.ca.CellID
    adata.obsm['X_pca'] = _tds.ca.HighVarPCA[:, :10]
    sc.pp.neighbors(adata, n_neighbors = 100, n_pcs = adata.obsm['X_pca'].shape[1])
    sc.tl.diffmap(adata)
    
    _tds.ca['DiffMap'] = adata.obsm['X_diffmap']
    
    diffmap1_time_corr = pd.Series(adata.obsm['X_diffmap'][:, 1], _tds.ca.CellID).corr(time_alignment_vector[tp])
    adata.uns['iroot'] = adata.obsm['X_diffmap'][:, 1].argmin() if diffmap1_time_corr>0 else adata.obsm['X_diffmap'][:, 1].argmax()
    sc.tl.dpt(adata, n_branchings=0, n_dcs=4,  min_group_size=1000)
    
    _tds.ca['Pseudotime'] = adata.obs['dpt_pseudotime'][_tds.ca.CellID].values
    _tds.ca['PseudotimeRank'] = adata.obs['dpt_pseudotime'].rank()[_tds.ca.CellID].values


  return np.sqrt(row)


In [None]:
#(not sure why we get that error, but doesn't seem to impact downstream things.)

### VGAM regression

In [1]:
## Copy file to cluster
# %cp -v ../data/pseudotime/stage6.scbeta_pseudotime.loom /Volumes/adrianveres/scbeta_scrnaseq__data/ 


In [2]:
# Execute "python scbeta_scrnaseq/scripts/vgam_regression_wrapper.py scbeta_scrnaseq__data/stage6.scbeta_pseudotime.loom"

# sbatch -p test -n 4 -N 1 --job-name scbeta --mem 12000 --time 06:00:00 \
#         --wrap """source activate py36; module load gcc/7.1.0-fasrc01 R/3.5.0-fasrc02;export R_LIBS_USER=$HOME/apps/R:$R_LIBS_USER;\
#             cd /n/home15/adrianveres; python scbeta_scrnaseq/scripts/vgam_regression_wrapper.py scbeta_scrnaseq__data/stage6.scbeta_pseudotime.loom"""

# Copy the file back

In [None]:
## Copy file back
# %cp -v /Volumes/adrianveres/scbeta_scrnaseq__data/stage6.scbeta_pseudotime.loom ../data/pseudotime/ 

In [2]:
ds = loompy.connect(f'../data/pseudotime/stage6.scbeta_pseudotime.loom')
ds.vals = sp.sparse.csr_matrix(ds.layers[""][:, :])

In [3]:
from scbeta_scrnaseq.pseudotime import annotate_vgam_ds

In [4]:
annotate_vgam_ds(ds)

In [None]:
# Output order of pseudotime ranks for 
pd.Series(scb_ds.ra['Pred__pseudotime__log2fc__start_end'], scb_ds.ra.Gene).fillna(0.0).to_csv('12_18.scbeta_pseudotime_fc.rnk', sep='\t')

