In [1]:
from itertools import chain
from itertools import product
import numpy as np
import anndata as ad
import pandas as pd
import scanpy as sc
import scanpy.external as sce
# import harmonypy as harmony
import seaborn as sns
from scipy import io
from scipy import sparse

# import scvelo as scv
# import cellrank as cr

link to `sc-toolbox` github: https://github.com/schillerlab/sc-toolbox

In [2]:
# import sc_toolbox as sctb # not updated for python 3.10

ModuleNotFoundError: No module named 'sc_toolbox'

In [3]:
import os
os.chdir('/local/workdir/dwm269/scCardiacOrganoid/')

In [4]:
from scripts.py.utils import *
from scripts.py.plots import *

# Plots

In [5]:
# scanpy github issue reference- https://github.com/scverse/scanpy/issues/955
def facet_embedding(adata, clust_key, basis, size=60, frameon=False, legend_loc=None, **kwargs):
    tmp = adata.copy()

    for i,clust in enumerate(adata.obs[clust_key].cat.categories):
        tmp.obs[clust] = adata.obs[clust_key].isin([clust]).astype('category')
        tmp.uns[clust+'_colors'] = ['#d3d3d3', adata.uns[clust_key+'_colors'][i]]

    sc.pl.embedding(
        tmp, 
        groups=tmp.obs[clust].cat.categories[1:].values, 
        color=adata.obs[clust_key].cat.categories.tolist(), 
        basis=basis,
        size=size, frameon=frameon, legend_loc=legend_loc, 
        **kwargs
    )

In [None]:
facet_embedding(
    adata,
    clust_key='leiden_harmony_sp_1.0',
    basis='phate_harmony_sp'
)

In [None]:
ax = plt.subplot()
sc.pl.embedding(
    adata,
    basis='phate_harmony_sp',
    ax=ax,
    color=["leiden_harmony_sp_1.0"], 
    size=0.1,
    alpha=0.4,
    ncols=2, 
    show=False
)
sc.pl.umap(
    adata[adata.obs["timepoint"].isin(["a"])],
    color="louvain",
    ax=ax, show=False
)
plt.show()

# Cell type distributions

https://gist.github.com/wflynny/79c5266cc39a4a884958d696f84f85df

In [None]:
# import scanpy.api as sc
import matplotlib.pyplot as plt
import seaborn as sns

def get_cluster_proportions(adata,
                            cluster_key="cluster_final",
                            sample_key="replicate",
                            drop_values=None):
    """
    Input
    =====
    adata : AnnData object
    cluster_key : key of `adata.obs` storing cluster info
    sample_key : key of `adata.obs` storing sample/replicate info
    drop_values : list/iterable of possible values of `sample_key` that you don't want
    
    Returns
    =======
    pd.DataFrame with samples as the index and clusters as the columns and 0-100 floats
    as values
    """
    
    adata_tmp = adata.copy()
    sizes = adata_tmp.obs.groupby([cluster_key, sample_key]).size()
    props = sizes.groupby(level=1).apply(lambda x: 100 * x / x.sum()).reset_index() 
    props = props.pivot(columns=sample_key, index=cluster_key).T
    props.index = props.index.droplevel(0)
    props.fillna(0, inplace=True)
    
    if drop_values is not None:
        for drop_value in drop_values:
            props.drop(drop_value, axis=0, inplace=True)
    return props


def plot_cluster_proportions(cluster_props, 
                             cluster_palette=None,
                             xlabel_rotation=0): 
    fig, ax = plt.subplots(dpi=300)
    fig.patch.set_facecolor("white")
    
    cmap = None
    if cluster_palette is not None:
        cmap = sns.palettes.blend_palette(
            cluster_palette, 
            n_colors=len(cluster_palette), 
            as_cmap=True)
   
    cluster_props.plot(
        kind="bar", 
        stacked=True, 
        ax=ax, 
        legend=None, 
        colormap=cmap
    )
    
    ax.legend(bbox_to_anchor=(1.01, 1), frameon=False, title="Cluster")
    sns.despine(fig, ax)
    ax.tick_params(axis="x", rotation=xlabel_rotation)
    ax.set_xlabel(cluster_props.index.name.capitalize())
    ax.set_ylabel("Proportion")
    fig.tight_layout()
    
    return fig

In [None]:
plot_cluster_proportions(
    get_cluster_proportions(
        adata[adata.obs["time_int"] == 4],
        cluster_key="leiden_harmony_sp_10_types",
        sample_key="pattern",
        drop_values=None
    ),
    # cluster_palette="tab20b",
    xlabel_rotation=90
)
gc.collect()

In [None]:
plot_cluster_proportions(
    get_cluster_proportions(
        adata[adata.obs["time_int"] == 21],
        cluster_key="leiden_harmony_sp_10_types",
        sample_key="pattern",
        drop_values=None
    ),
    xlabel_rotation=90
)

In [None]:
plot_cluster_proportions(
    get_cluster_proportions(
        adata[adata.obs["pattern"] == "600um"],
        cluster_key="leiden_harmony_sp_10_types",
        sample_key="timepoint",
        drop_values=None
    ),
    xlabel_rotation=90
)