In [1]:
import scvelo as scv
import scanpy as sc
import numpy as np

In [2]:
def pp(adata):
    sc.pp.filter_cells(adata, min_genes=200) #get rid of cells with fewer than 200 genes
    sc.pp.filter_genes(adata, min_cells=3) #get rid of genes that are found in fewer than 3 cells
    adata.var['mt'] = adata.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
    sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
    upper_lim = np.quantile(adata.obs.n_genes_by_counts.values, .98)
    lower_lim = np.quantile(adata.obs.n_genes_by_counts.values, .02)
    adata = adata[(adata.obs.n_genes_by_counts < upper_lim) & (adata.obs.n_genes_by_counts > lower_lim)]
    adata = adata[adata.obs.pct_counts_mt < 20]
    sc.pp.normalize_total(adata, target_sum=1e4) #normalize every cell to 10,000 UMI
    sc.pp.log1p(adata) #change to log counts
    sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5) #these are default values
    adata.raw = adata #save raw data before processing values and further filtering
    adata = adata[:, adata.var.highly_variable] #filter highly variable
    sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt']) #Regress out effects of total counts per cell and the percentage of mitochondrial genes expressed
    sc.pp.scale(adata, max_value=10) #scale each gene to unit variance
    sc.tl.pca(adata, svd_solver='arpack')
    sc.pp.neighbors(adata, n_neighbors=10, n_pcs=20)
    sc.tl.leiden(adata, resolution = 0.25)
    sc.tl.umap(adata)
    return adata


In [3]:
adata = sc.read("../dataSaveOriginal/rawDataset.h5ad")
adata.var_names_make_unique()
adata.X = adata.X.astype('float64')

In [4]:
adataCON_DS2U =  adata[adata.obs["sample"] == "CON_DS2U"]
adataCON_H9 =  adata[adata.obs["sample"] == "CON_H9"]
adataCON_IMR =  adata[adata.obs["sample"] == "CON_IMR"]
adataCON_ihtc =  adata[adata.obs["sample"] == "CON_ihtc"]

In [5]:
adataCON_DS2U = pp(adataCON_DS2U)
adataCON_H9 = pp(adataCON_H9)
adataCON_IMR = pp(adataCON_IMR)
adataCON_ihtc = pp(adataCON_ihtc)

In [6]:
ldata = scv.read('./data/CON1.loom')

In [20]:
ldata = pp(ldata)

IndexError: cannot do a non-empty take from an empty axes.

In [15]:
adataCON_DS2U

AnnData object with n_obs × n_vars = 8432 × 5038
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'group', 'sample', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden'
    var: 'features', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'log1p', 'hvg', 'pca', 'neighbors', 'leiden', 'umap'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    obsp: 'distances', 'connectivities'

In [19]:
test = scv.utils.merge(adataCON_ihtc, ldata)

UnboundLocalError: local variable 'id_length' referenced before assignment

In [None]:
scv.pp.filter_and_normalize(test)
scv.pp.moments(test)
scv.tl.velocity(test, mode="stochastic")

In [None]:
scv.tl.velocity_graph(test)

In [None]:
scv.pl.velocity_embedding_stream(test, basis="umap", color="clusters", save="velocityEmbedding.pdf")

In [None]:
scv.tl.velocity_pseudotime(test)
scv.pl.scatter(test, color="velocity_pseudotime")

In [None]:
scv.tl.recover_dynamics(test, n_jobs=20)
scv.tl.velocity(test, mode = "dynamical")
scv.tl.velocity_graph(test)

In [None]:
scv.pl.velocity_embedding_stream(test, basis="umap", color = "clusters")

In [None]:
scv.tl.latent_time(test)
scv.pl.scatter(test, color="latent_time", color_map="gnuplot", size=80)