# Pancreas endocrinogenesis E1415 - scVelo

# Imports and settings

In [None]:
import os
import sys
from pathlib import Path

import numpy as np
import pandas as pd 

import matplotlib.pyplot as plt
import seaborn as sns

import scanpy as sc
import scvelo as scv
import cellrank as cr
import anndata as ad
from cellrank.kernels import ConnectivityKernel, VelocityKernel
from cellrank.estimators import GPCCA

In [None]:
cr.logging.print_versions()

# Data loading and preprocessing

In [None]:
org_adata = sc.read("/lustre/groups/ml01/workspace/monge_velo/data/benchmarks/gastrulation/adata_gastrulation.h5ad")

leave_out_stage = ['E6.5', 'E6.75', 'E7.0', 'E7.25', 'E7.5', 'E7.75', 'E8.0']
leave_in_cell = ['Blood progenitors 2', 'Erythroid1', 'Erythroid2', 'Erythroid3']
adata = org_adata[~org_adata.obs["stage"].isin(leave_out_stage)].copy()
adata = adata[adata.obs["celltype"].isin(leave_in_cell)].copy()

map_dict = {"E6.5": 6.5, "E6.75": 6.75, "E7.0": 7.0, "E7.25": 7.25, "E7.5": 7.5, "E7.75": 7.75, "E8.0": 8.0,
            "E8.25": 8.25, "E8.5": 8.5}
adata.obs["stage"] = adata.obs["stage"].copy().map(map_dict)

adata.obs = adata.obs.loc[:, ['stage', 'celltype']]
adata.var = adata.var[[]]

sc.pp.log1p(adata)

In [None]:
adata_825 = adata[adata.obs['stage'] == 8.25, :].copy()
adata_85 = adata[adata.obs['stage'] == 8.5, :].copy()

In [None]:
sc.pp.pca(adata_825, n_comps=50)
scv.pp.moments(adata_825)
scv.tl.recover_dynamics(adata_825, n_jobs=80)
scv.tl.velocity(adata_825, mode = 'dynamical')

In [None]:
sc.pp.pca(adata_85, n_comps=50)
scv.pp.moments(adata_85)
scv.tl.recover_dynamics(adata_85, n_jobs=80)
scv.tl.velocity(adata_85, mode = 'dynamical')

In [None]:
adata = ad.concat([adata_825, adata_85])
sc.pp.neighbors(adata)
scv.tl.umap(adata)
adata

# Precomputed data

In [None]:
adata = sc.read("/home/icb/jonas.flor/precomputed_data/scvelo_branch_gast_pre.h5ad")
adata

# Transition probabilities

In [None]:
vk = VelocityKernel(adata).compute_transition_matrix()

# Macrostate calculation

In [None]:
g = GPCCA(vk)
g.compute_schur(n_components=20)
g.plot_spectrum(real_only=True)

In [None]:
g.compute_macrostates(n_states=3, cluster_key="celltype")
scv.set_figure_params('scvel o', transparent=True, fontsize=20, color_map='viridis')
g.plot_macrostates(discrete=True, basis="umap", legend_loc="right", legend_fontweight='normal', legend_fontsize='12', dpi=250)

In [None]:
g.compute_macrostates(n_states=4, cluster_key="celltype")
scv.set_figure_params('scvel o', transparent=True, fontsize=20, color_map='viridis')
g.plot_macrostates(discrete=True, basis="umap", legend_loc="right", legend_fontweight='normal', legend_fontsize='12', dpi=250)

# Fate probabilities

In [None]:
g.set_terminal_states_from_macrostates(
    [
        'Erythroid' 
    ]
)
g.compute_absorption_probabilities(solver="gmres", use_petsc=True, tol=1e-12, preconditioner='ilu', time_to_absorption='all')
g.plot_absorption_probabilities(same_plot=False, basis="umap", perc=[0, 99], ncols=2)

In [None]:
scv.set_figure_params('scvelo', transparent=True, fontsize=20, color_map='viridis')
cr.pl.aggregate_absorption_probabilities(
    adata,
    mode='heatmap',
    lineages=['Erythroid3'],
    cluster_key='celltype',
    clusters=['Blood progenitors 2', 'Erythroid1', 'Erythroid2', 'Erythroid3'],
    figsize=(17, 6),
    title="",
    #save="transprobs_pancreas1415_scvelo.png"
)

In [None]:
cluster = ['Blood progenitors 2', 'Erythroid1', 'Erythroid2', 'Erythroid3']
terminal_cluster = ['Erythroid3']

aggregated_absorption_prob = np.empty((len(terminal_cluster), len(cluster)))
data = g.absorption_probabilities.X
sums = np.sum(data, axis=0)
tmp = adata.obs['celltype']

for i in range(len(cluster)):
    aggregated_absorption_prob[:,i] = np.dot(np.array((tmp.array==cluster[i]).astype(int)),data)
for i in range(len(cluster)):
    aggregated_absorption_prob[:,i] = aggregated_absorption_prob[:,i]/np.sum(aggregated_absorption_prob[:,i])
df = pd.DataFrame(aggregated_absorption_prob)
df.index = terminal_cluster
df.columns = cluster

In [None]:
correct_transitions = [('Erythroid3', 'Blood progenitors 2'), ('Erythroid3', 'Erythroid2 1'),
                       ('Erythroid3', 'Erythroid2 2'), ('Erythroid3', 'Erythroid3')]

sum_prob = 0
for i in correct_transitions:
    sum_prob = sum_prob + df.loc[i]
mean_prob = sum_prob/len(cluster)
mean_prob

# Velocity stream plot

In [None]:
adata.uns["velocity_graph"] = vk.transition_matrix

scv.set_figure_params('scvelo', transparent=True, fontsize=10, dpi_save=400,color_map='viridis')
fig = plt.figure()
ax = scv.pl.velocity_embedding_stream(adata, basis="umap", smooth=0.5, title="")#, legend_loc="none", show=False)
#legend =ax.legend(bbox_to_anchor=[1.25, 1.5], loc='upper center', ncol=6,frameon=True, prop={'size': 18})

In [None]:
scv.tl.velocity_confidence(adata)

In [None]:
scv.set_figure_params('scvelo', fontsize=20)
scv.settings.presenter_view = False
scv.pl.scatter(adata, color='velocity_confidence',
               perc=[2, 98],
               cmap='gnuplot',
               vmid=0.75,
               rescale_color=[0.5, 1.0],
               size=15,
               title="",
               #save="velconf_pancreas1415_scvelo.png"
              )