# Example Notebook for Interactive Plotting in Scanpy

In this Notebook, we demonstrate the use of some interactive plotting functions which perfectly harmonize with the analysis framework scanpy (Link).

In [1]:
import warnings
warnings.filterwarnings(action='once')
import numpy as np
import re
import scanpy.api as sc

sc.logging.print_versions()
sc.settings.verbosity = 0

  return f(*args, **kwds)
  return f(*args, **kwds)


scanpy==1.4.3 anndata==0.6.18 umap==0.3.8 numpy==1.15.4 scipy==1.2.0 pandas==0.23.4 scikit-learn==0.21.0 statsmodels==0.9.0 python-igraph==0.7.1 louvain==0.6.1 


  self._config = SafeConfigParser()
  return f(*args, **kwds)
  return f(*args, **kwds)


In [2]:
import interactive_plotting as ipl  
from bokeh.io import output_notebook
output_notebook()

In [3]:
np.random.seed(42)

## Import Data

In [4]:
adata = sc.datasets.paul15()
adata.var_names_make_unique()
adata

... storing 'paul15_clusters' as categorical


AnnData object with n_obs × n_vars = 2730 × 3451 
    obs: 'paul15_clusters'
    uns: 'iroot'

## Additional Annotations

In [5]:
regex = re.compile('^(mt).*', re.IGNORECASE)
mito_genes = [l for l in adata.var_names for m in [regex.search(l)] if m]
adata.var['mito'] = False
adata.var.loc[mito_genes, 'mito'] = True
print('Found {} mito genes and annotated.'.format(len(mito_genes)))

sc.pp.calculate_qc_metrics(adata, qc_vars=['mito'], inplace=True)

Found 15 mito genes and annotated.


In [6]:
adata.obs['batch'] = np.random.choice(['batch_1', 'batch_2'], adata.n_obs)

adata.obs['group'] = np.random.choice(['group_1', 'group_2'], adata.n_obs)
adata.obs['group'] = adata.obs['group'].astype('category')

adata.obs['plate'] = np.random.choice(['plate_1', 'plate_2', 'plate_3'], adata.n_obs)
adata.obs['plate'] = adata.obs['plate'].astype('category')

In [7]:
ipl.interactive_hist(adata, groups=['plate'],
                     keys=['n_genes_by_counts', 'total_counts', 'pct_counts_mito'], 
                     fill_alpha=0.3,
                     plot_width=400, plot_height=400)

## Filtering

In [8]:
sc.pp.filter_cells(adata, min_genes=200)
adata = adata[adata.obs['total_counts'] < 8000].copy()
adata = adata[adata.obs['pct_counts_mito'] < 2]

## Normalization

In [9]:
adata.raw = adata.copy()
sc.pp.recipe_zheng17(adata, plot=False)

sc.pp.neighbors(adata, n_neighbors=30, n_pcs=7, random_state=42)
sc.tl.louvain(adata, resolution=0.45, random_state=42)
sc.tl.umap(adata, random_state=42)

## Interactive histogram with groups

In [10]:
ipl.thresholding_hist(adata, key='n_counts', categories=dict(cat_1=[0, 700], cat_2=[700, 1200]))

## Highlighting differentially expressed genes

In [11]:
sc.tl.rank_genes_groups(adata, groupby='louvain')
ipl.highlight_de(adata, cell_keys='batch', legend_loc='top_right')

... storing 'batch' as categorical
Defaulting to column, but this will raise an ambiguity error in a future version
  xs, ys, ks = zip(*conv_hulls.groupby(key).apply(lambda df: list(map(list, (df['x'], df['y'], df[key])))))


## Selecting 'iroot'

In [12]:
sc.tl.diffmap(adata)
ipl.highlight_indices(adata, key='group', basis='diffmap', components=[1, 3])

In [13]:
adata.uns['iroot'] = 840
sc.tl.dpt(adata)

In [None]:
ipl.link_plot(adata, bases=['diffmap', 'umap'], components=[[1, 2], [1, 2]],
              genes=list(map(lambda r: r[0], adata.uns['rank_genes_groups']['names']))[:10],
              cutoff=True,
              key='louvain', distance='dpt', legend_loc='top_right')

## Velocity plot

In [None]:
ipl.velocity_plot(adata,
                  genes=list(map(lambda r: r[0], adata.uns['rank_genes_groups']['names']))[:1],
                  paths=[['0', '1'], ['0', '2']])