In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import scanpy.external as sce
import scanpy.logging as logg
import scvelo as scv
import matplotlib.pyplot as plt
import seaborn as sb
import gseapy as gp
import plotly.graph_objects as go
import scipy
scv.settings.presenter_view = True  # set max width size for presenter view
scv.settings.set_figure_params('scvelo')  # for beautified visualization
plt.rcParams['figure.figsize']=(8,8) #rescale figures
sc.settings.verbosity = 3
# Matplotlib backwards compatibility hack
import matplotlib
matplotlib.cbook.iterable = np.iterable

from IPython.display import display
import vdom.helpers as vh

import cellicium.develop as cdev
import cellicium.sharedata as cdata
import cellicium.scrna as crna

## Dataset


Source: 
- url: https://www.embopress.org/doi/full/10.15252/msb.20209946
- title: The transcriptome dynamics of single cells during the cell cycle
- authors: Daniel Schwabe, Sara Formichetti, Jan Philipp Junker, Martin Falcke, Nikolaus Rajewsky

Data: GSE142277
- Location: GSE142277/GSM4224315/GSM4224315_out_gene_exon_tagged.dge_exonssf002_WT.txt
- Location: GSE142277/GSM4224315/GSM4224315_out_gene_exon_tagged.dge_intronssf002_WT.txt


## Analysis

### Convert the data to AnnotatedData

In [None]:
# cdev.reload_user_libs(cdata)
# cdev.reload_user_libs(crna)
# cdev.reload_user_libs(crna.tools)
# cdev.reload_user_libs(crna.qc)

In [None]:
data_manager = cdata.dataset_manager()
exons_file = data_manager.get_file("GSE142277/GSM4224315/GSM4224315_out_gene_exon_tagged.dge_exonssf002_WT.txt")
introns_file = data_manager.get_file("GSE142277/GSM4224315/GSM4224315_out_gene_exon_tagged.dge_intronssf002_WT.txt")
exons = sc.read_csv(exons_file, delimiter = "\t").transpose()
introns = sc.read_csv(introns_file, delimiter = "\t").transpose()

In [None]:
adata = crna.tl.add_intron_data(exons, introns)
adata.write('/home/jovyan/external/GSE142277/GSM4224315/GSM4224315.h5ad')

### Preprocess

In [None]:
adata = sc.read_h5ad("/home/jovyan/external/GSE142277/GSM4224315/GSM4224315.h5ad")
adata

In [None]:
crna.qc.qc_plots(adata)

In [None]:
# Filter cells according to identified QC thresholds:
print('Total number of cells: {:d}'.format(adata.n_obs))

sc.pp.filter_cells(adata, min_counts = 1500)
print('Number of cells after min count filter: {:d}'.format(adata.n_obs))

sc.pp.filter_cells(adata, max_counts = 40000)
print('Number of cells after max count filter: {:d}'.format(adata.n_obs))

adata = adata[adata.obs['mt_frac'] < 0.2]
print('Number of cells after MT filter: {:d}'.format(adata.n_obs))

sc.pp.filter_cells(adata, min_genes = 700)
print('Number of cells after gene filter: {:d}'.format(adata.n_obs))


In [None]:
crna.qc.qc_plots(adata)

In [None]:
sc.pp.filter_genes(adata, min_counts = 20)
sc.pp.filter_genes(adata, min_cells = 5)
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
adata_all = adata.copy()
sc.pp.highly_variable_genes(adata, n_top_genes = 3000)
adata = adata[:, adata.var['highly_variable']]

In [None]:
sc.pp.pca(adata)
sc.pp.neighbors(adata, n_neighbors = 10, n_pcs = 50)
#sc.tl.leiden(adata, resolution = 1.0, key_added = 'leiden_1')

In [None]:
sc.tl.umap(adata, n_components = 3)
sc.tl.diffmap(adata)

In [None]:
scv.tl.score_genes_cell_cycle(adata)
fig, axes = plt.subplots(1, 3, figsize = (15, 5))
scv.pl.scatter(adata, color_gradients=['S_score', 'G2M_score'], basis = 'pca', smooth=True, perc=[5, 95], ax = axes[0], show = False)
axes[0].set_title("PCA")
scv.pl.scatter(adata, color_gradients=['S_score', 'G2M_score'], basis = 'umap', smooth=True, perc=[5, 95], ax = axes[1], show = False)
axes[1].set_title("UMAP")
scv.pl.scatter(adata, color_gradients=['S_score', 'G2M_score'], basis = 'diffmap', smooth=True, perc=[5, 95], ax = axes[2], show = False)
axes[2].set_title("DiffMap")

In [None]:
crna.pl.plot_scatter_3d(adata, color_gradients = ['S_score', 'G2M_score'])

## Computing velocity

In [None]:
scv.pp.moments(adata, n_pcs = 30, n_neighbors = 30)
scv.tl.recover_dynamics(adata, n_jobs = 8)
scv.tl.velocity(adata, mode='dynamical')
scv.tl.velocity_graph(adata)

In [None]:
scv.pl.velocity_embedding(adata, color_gradients=['S_score', 'G2M_score'], basis = 'pca', arrow_length = 2, arrow_size = 1, dpi = 100)

In [None]:
crna.pl.plot_arrows_3d(adata, arrows = 'velocity_pca', color_gradients = ['S_score', 'G2M_score'])

In [None]:
cdev.reload_user_libs(crna.tl)
W, b = crna.tl.find_rotational_plane(adata.obsm['X_pca'], adata.obsm['velocity_pca'], n_comp = 10)

In [None]:
e_val, e_vec = np.linalg.eig(W)
display(e_val)
display(np.abs(e_val))
e_ind = 0
display(e_vec[:, e_ind])
display(np.abs(e_vec[:, e_ind]))
display(np.dot(W, np.abs(e_vec[:, e_ind])))

axis_dir = scipy.linalg.null_space([e_vec[:3, 0], e_vec[:3, 1]]).flatten()
print(axis_dir)
axis_dir = np.real(axis_dir)
print(axis_dir)
dc1_dir = np.array([0, axis_dir[2], -axis_dir[1]])
dc2_dir = scipy.linalg.null_space([axis_dir, dc1_dir]).flatten()
axis_dir = axis_dir / np.linalg.norm(axis_dir)
dc1_dir = dc1_dir / np.linalg.norm(dc1_dir)
dc2_dir = dc2_dir / np.linalg.norm(dc2_dir)
print(dc1_dir)
print(dc2_dir)

In [None]:
fig, axes = plt.subplots(1, 4, figsize = (15, 5))
hm0 = axes[0].imshow(W)
fig.colorbar(hm0, ax = axes[0])
hm1 = axes[1].imshow([b])
fig.colorbar(hm1, ax = axes[1])
hm2 = axes[2].imshow([np.abs(e_val)])
fig.colorbar(hm2, ax = axes[2])
dir_img = np.zeros((e_vec.shape[0], 3))
dir_img[:3, 0] = axis_dir
dir_img[:3, 1] = dc1_dir
dir_img[:3, 2] = dc2_dir
hm3 = axes[3].imshow(np.abs(np.hstack([e_vec[:, 0:3], dir_img])))
fig.colorbar(hm3, ax = axes[3])

In [None]:
crna.pl.plot_arrows_3d(adata, arrows = 'velocity_pca', color_gradients = ['S_score', 'G2M_score'], directions = [10 * axis_dir, 10 * dc1_dir, 10 * dc2_dir])

In [None]:
np.linalg.norm(dc2_dir)
np.dot(adata.obsm['X_pca'][:, :3], np.array([dc1_dir, dc2_dir]).T)
X_DC = np.dot(adata.obsm['X_pca'][:, :3] / np.linalg.norm(adata.obsm['X_pca'][:, :3], axis = 1).reshape(-1, 1), np.array([dc1_dir, dc2_dir, axis_dir]).T)
# Here the transformation to pseudotime should be adjusted
adata.obs['pseudo_t'] = np.mod(0.4 - np.arctan2(X_DC[:, 1], X_DC[:, 0]) / np.pi / 2, 1)
adata.obs.sort_values(['pseudo_t'])

In [None]:
crna.pl.plot_scatter_3d(adata, color = ['pseudo_t'])

In [None]:
adata_all.obs['pseudo_t'] = adata.obs['pseudo_t']
scv.tl.score_genes_cell_cycle(adata_all)
scv.pl.scatter(adata_all, x = 'pseudo_t', y = ['S_score', 'G2M_score'])

In [None]:
scv.pl.scatter(adata_all, x = 'pseudo_t', y = ['CCND3', 'CDK4', 'CDK6', 'CCNE2', 'CDK2', 'CCNA2', 'CDK1', 'CCNB1', 'CDC20'], color_gradients = ['S_score', 'G2M_score'])

In [None]:
#scv.pl.heatmap(adata_all, var_names = ['CCND3', 'CDK4', 'CDK6', 'CCNE2', 'CDK2', 'CCNA2', 'CDK1', 'CCNB1', 'CDC20'], sortby = 'pseudo_t', n_convolve = 30, colorbar = True)
scv.pl.heatmap(adata_all, var_names = ['CCND3', 'CCNE2', 'CCNA2', 'CCNB1'], sortby = 'pseudo_t', n_convolve = 30, colorbar = True, sort = False)
scv.pl.heatmap(adata_all, var_names = ['CCND3', 'CCNE2', 'CCNA2', 'CCNB1'], sortby = 'pseudo_t', n_convolve = 30, colorbar = True, sort = False, xkey = 'unspliced')

In [None]:
scv.pl.scatter(adata, ['CCND3', 'CCNE2', 'CCNA2', 'CCNB1'], color = ['pseudo_t'])

In [None]:
adata.obs