In [1]:
import anndata
import holoviews as hv
import pandas as pd
import numpy as np
import scplot as sp
hv.extension('bokeh')

Read in data. The data consists of 3K PBMCs from a healthy donor from 10x Genomics.

In [2]:
adata = anndata.read('3K_PBMC.h5ad')

Violin plot of QC metrics

In [3]:
sp.violin(adata, ['n_genes', 'n_counts', 'percent_mito'], cols=3)

Scatter plot matrix of QC metrics. You can optionally color by the plot by cluster assignment.

In [4]:
sp.scatter_matrix(adata, ['n_genes', 'n_counts', 'percent_mito'], color='louvain')

Violin plot of QC metrics by cluster assignment

In [5]:
sp.violin(adata, ['n_genes', 'n_counts', 'percent_mito'], by='louvain', cols=2, width=450)

Violin plot of expression of gene expression by cluster

In [6]:
sp.violin(adata, ['CST3', 'NKG7'], by='louvain', cols=2, width=450)

Embedding of gene expression and cluster assignments

In [7]:
sp.embedding(adata, basis='umap', keys=['CST3', 'NKG7', 'PPBP', 'louvain'])

Heatmap of mean gene expression

In [8]:
marker_genes = ['IL7R', 'CD79A', 'MS4A1', 'CD8A', 'CD8B', 'LYZ', 'CD14',
                'LGALS3', 'S100A8', 'GNLY', 'NKG7', 'KLRB1',
                'FCGR3A', 'MS4A7', 'FCER1A', 'CST3', 'PPBP']
sp.heatmap(adata, keys=marker_genes, cmap='Reds',by='louvain')

Dotplot of gene expression

In [9]:
sp.dotplot(adata, keys=marker_genes, by='louvain')

Scatter plot of FCGR3A versus MS4A7, colored by expression of CD14

In [10]:
sp.scatter(adata, x='FCGR3A', y='MS4A7', color='CD14')

Use the box select tool to select cells. After selection is complete, you can get the selected range.

In [11]:
p = sp.embedding(adata, basis='umap', keys=['CST3'])
bounds = hv.streams.BoundsXY(source=p[0,0])
p

In [12]:
bounds

BoundsXY(bounds=None)

In [13]:
# Save plot to pdf
# hv.save(p, 'test.pdf', fmt='pdf', backend='matplotlib')

Duplicate cells to create a dataset with 5 million cells. We include only 3 genes to conserve memory. You can also open a large AnnData file in `backed` mode to load data on demand.

In [14]:
genes_to_include = ['CST3', 'NKG7', 'PPBP']
upsampled_adata = anndata.AnnData(adata.raw[:, genes_to_include].X, adata.obs.copy(), pd.DataFrame(index=genes_to_include))
upsampled_adata.obsm['X_umap'] = adata.obsm['X_umap']
upsampled_adata = upsampled_adata[np.repeat(np.arange(0, upsampled_adata.shape[0]), 2000)]
"{:,} cells".format(upsampled_adata.shape[0])

'5,276,000 cells'

In [15]:
sp.embedding(upsampled_adata, basis='umap', keys=['CST3', 'count'], nbins=200) # will crash without nbins

In [16]:
sp.scatter(upsampled_adata, x='CST3', y='NKG7', color='PPBP', nbins=200) # will crash without nbins