In [1]:
import matplotlib.pyplot as plt

from ALLCools.clustering import tsne
from ALLCools.plot import *
from ALLCools.integration import SeuratIntegration
from wmb import brain

import scanpy as sc
import anndata
from harmonypy import run_harmony

CEMBA_SNM3C_3C_CHROM100K_RAW_ZARR_PATH do not exist: /gale/netapp/cemba3c/BICCN/CEMBA_3C/mcds/CEMBA.snm3C.chrom100k_raw.zarr
CEMBA_SNM3C_3C_COMPARTMENT_ZARR_PATH do not exist: /gale/netapp/cemba3c/BICCN/CEMBA_3C/mcds/CEMBA.snm3C.3C.mcds/chrom100k
CEMBA_SNM3C_3C_DOMAIN_INSULATION_ZARR_PATH do not exist: /gale/netapp/cemba3c/BICCN/CEMBA_3C/mcds/CEMBA.snm3C.3C.mcds/chrom25k
CEMBA_SNM3C_CELL_TYPE_ANNOTATION_PATH do not exist: /gale/netapp/cemba3c/BICCN/wmb/cemba/CEMBA.snm3C.Annotations.zarr
CEMBA_SNM3C_GENE_CHUNK_ZARR_PATH do not exist: /gale/netapp/cemba3c/BICCN/share/gene_mcds_for_plot/GeneChunks/CEMBA.snm3C
CEMBA_SNMC_GENE_CHUNK_ZARR_PATH do not exist: /gale/netapp/cemba3c/BICCN/share/gene_mcds_for_plot/GeneChunks/CEMBA.snmC
AIBS_SMART_GENE_CHUNK_ZARR_PATH do not exist: /gale/netapp/cemba3c/BICCN/share/gene_mcds_for_plot/GeneChunks/AIBS.SMART
AIBS_TENX_GENE_CHUNK_ZARR_PATH do not exist: /gale/netapp/cemba3c/BICCN/share/gene_mcds_for_plot/GeneChunks/AIBS.TENX
BROAD_TENX_GENE_CHUNK_ZARR_P

In [3]:
adata_merge = anndata.read_h5ad('final_with_coords.h5ad')

In [4]:
adata_merge

AnnData object with n_obs × n_vars = 1542029 × 5549
    obs: 'count', 'umi_count', 'n_counts', 'Modality', 'L2', 'L3', 'DissectionRegion', 'L2_transfer', 'L3_transfer', 'DissectionRegion_transfer', 'leiden'
    var: 'chrom', 'end-RNA', 'start-RNA', 'name-RNA', 'mean-RNA', 'std-RNA', 'cov_mean-mC', 'end-mC', 'start-mC', 'cef-mC'
    uns: 'leiden', 'neighbors'
    obsm: 'X_harmony', 'X_pca_integrate', 'X_tsne'
    obsp: 'connectivities', 'distances'

# umap

In [None]:
adata_merge.obsm['X_pca'] = adata_merge.obsm['X_harmony']
sc.pp.neighbors(adata_merge)

In [None]:
min_dist = max(0.1, 1 - adata_merge.shape[0] / 60000)
sc.tl.umap(adata_merge, min_dist=min_dist)
del adata_merge.obsm['X_pca']

In [None]:
sc.tl.leiden(adata_merge, resolution=0.3)

# plot

In [None]:
def plot(coord_base):
    fig, axes = plt.subplots(nrows=2,
                             ncols=3,
                             figsize=(12, 8),
                             dpi=300,
                             constrained_layout=True)

    mc_data = adata_merge[adata_merge.obs['Modality'] == 'mC']
    rna_data = adata_merge[adata_merge.obs['Modality'] == 'RNA']

    ax = axes[0, 0]
    categorical_scatter(data=rna_data,
                        coord_base=coord_base,
                        max_points=None,
                        hue=None,
                        scatter_kws=dict(color='lightgrey'),
                        ax=ax)
    categorical_scatter(
        data=mc_data,
        ax=ax,
        coord_base=coord_base,
        hue='leiden',
        text_anno='leiden',
        palette='tab20',
        max_points=None,
    )

    ax = axes[0, 1]
    categorical_scatter(data=rna_data,
                        coord_base=coord_base,
                        max_points=None,
                        hue=None,
                        scatter_kws=dict(color='lightgrey'),
                        ax=ax)
    categorical_scatter(
        data=mc_data,
        ax=ax,
        coord_base=coord_base,
        hue=f'{plot_key}',
        # text_anno='L2',
        palette='tab20',
        max_points=None,
    )

    ax = axes[0, 2]
    categorical_scatter(data=rna_data,
                        coord_base=coord_base,
                        max_points=None,
                        hue=None,
                        scatter_kws=dict(color='lightgrey'),
                        ax=ax)
    region_palette_1 = brain.get_dissection_region_palette(ref_region_type)
    categorical_scatter(
        data=mc_data,
        ax=ax,
        coord_base=coord_base,
        hue='DissectionRegion',
        palette=region_palette_1,
    )

    ax = axes[1, 0]
    categorical_scatter(data=mc_data,
                        coord_base=coord_base,
                        max_points=None,
                        hue=None,
                        scatter_kws=dict(color='lightgrey'),
                        ax=ax)
    categorical_scatter(
        data=rna_data,
        ax=ax,
        coord_base=coord_base,
        hue='leiden',
        text_anno='leiden',
        palette='tab20',
        max_points=None,
    )

    ax = axes[1, 1]
    categorical_scatter(data=mc_data,
                        coord_base=coord_base,
                        max_points=None,
                        hue=None,
                        scatter_kws=dict(color='lightgrey'),
                        ax=ax)
    categorical_scatter(
        data=rna_data,
        ax=ax,
        coord_base=coord_base,
        hue=f'{plot_key}_transfer',
        # text_anno='L2_transfer',
        palette='tab20',
        max_points=None,
    )

    ax = axes[1, 2]
    categorical_scatter(data=mc_data,
                        coord_base=coord_base,
                        max_points=None,
                        hue=None,
                        scatter_kws=dict(color='lightgrey'),
                        ax=ax)
    region_palette_2 = brain.get_dissection_region_palette(query_region_type)
    categorical_scatter(
        data=rna_data,
        ax=ax,
        coord_base=coord_base,
        hue='DissectionRegion',
        palette=region_palette_2,
    )

    for i, xx in enumerate([
            'Ref Co-cluster', 'Ref CellType', 'Ref Region',
            'Query Co-cluster', 'Query CellType Transfer', 'Query Region'
    ]):
        axes.flatten()[i].set_title(xx, fontsize=15)
    return

In [None]:
#plot('umap')

# save

In [None]:
adata_merge.write_h5ad('final_with_umap_coords.h5ad')

In [None]:
adata_merge