In [None]:
import numpy as np
import pandas as pd
import anndata
import scanpy as sc
import seaborn as sns
import harmonypy as hm
import scrublet as scr
from matplotlib import rcParams
import umap

sc.settings.verbosity = 1  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=120, color_map='viridis')
sc.logging.print_versions()

In [None]:
def DEG_table(adata):
    result = adata.uns['rank_genes_groups']
    groups = result['names'].dtype.names
    markers = pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
        for group in groups for key in ['names', 'pvals_adj', 'logfoldchanges']})
    return markers

In [None]:
def filtered_DEGs(adata, q_value = 1E-5, fold_change = 1):
    markers = DEG_table(adata)
    groups = adata.uns['rank_genes_groups']['names'].dtype.names
    output = pd.DataFrame()
    for i in groups:
        temp = markers[(markers.loc[:,i+'_p'] <= q_value) & (markers.loc[:,i+'_l'] >= fold_change)].loc[:,(i+'_n'):(i+'_l')]
        temp.reset_index(inplace=True, drop=True)
        output = output.join(temp, how='outer')
    return output

In [None]:
def filtered_DEG_names(adata, q_value = 1E-5, fold_change = 1):
    markers = DEG_table(adata)
    groups = adata.uns['rank_genes_groups']['names'].dtype.names
    output = pd.DataFrame()
    for i in groups:
        temp = markers[(markers.loc[:,i+'_p'] <= q_value) & (markers.loc[:,i+'_l'] >= fold_change)].loc[:,(i+'_n')]
        temp.reset_index(inplace=True, drop=True)
        output = output.join(temp, how='outer')
    return output

In [None]:
def filtered_down_reg_DEGs(adata, q_value = 1E-5, fold_change = -1):
    markers = DEG_table(adata)
    groups = adata.uns['rank_genes_groups']['names'].dtype.names
    output = pd.DataFrame()
    for i in groups:
        temp = markers[(markers.loc[:,i+'_p'] <= q_value) & (markers.loc[:,i+'_l'] <= fold_change)].loc[:,(i+'_n'):(i+'_l')]
        temp.reset_index(inplace=True, drop=True)
        output = output.join(temp, how='outer')
    return output

In [None]:
adata_orig = sc.read_h5ad('./QC_global_zfish_ven.h5ad')

In [None]:
adata_orig

In [None]:
adata_orig.shape

In [None]:
print(adata_orig.X.max())
print(adata_orig.raw.X.max())

In [None]:
sc.pl.umap(adata_orig, color = ['cell_type',
                              ], size = 6, legend_fontsize = 10, frameon = False, use_raw=True, color_map='tab10' )

In [None]:
adata_CMs = adata_orig[adata_orig.obs['cell_type'].isin(['CM1','CM2','CM4'])].copy()

In [None]:
adata_ECs = adata_orig[adata_orig.obs['cell_type'].isin(['Endothelial Cells','Endothelial Cells 2'])].copy()

In [None]:
adata_EC2s = adata_orig[adata_orig.obs['cell_type'].isin(['Endothelial Cells','Endothelial Cells 2','Vasculature Cells'])].copy()

In [None]:
adata_out = anndata.AnnData(X=adata_CMs.raw.X, obs=adata_CMs.obs, var=adata_CMs.raw.var, obsm=adata_CMs.obsm).copy()
adata_out.write('./CMs_zfish_ven_RAW.h5ad')

In [None]:
adata_out = anndata.AnnData(X=adata_ECs.raw.X, obs=adata_ECs.obs, var=adata_ECs.raw.var, obsm=adata_ECs.obsm).copy()
adata_out.write('./ECs_zfish_ven_RAW.h5ad')

In [None]:
adata_out = anndata.AnnData(X=adata_EC2s.raw.X, obs=adata_EC2s.obs, var=adata_EC2s.raw.var, obsm=adata_EC2s.obsm).copy()
adata_out.write('./EC2s_zfish_ven_RAW.h5ad')

In [None]:
adata_orig.obs['cell_type'].cat.categories

In [None]:
adata_orig.obs['cell_type'] = adata_orig.obs['cell_type'].astype(str)
old_names = ['AV/ Ventricular Conduction Cells', 'CM1', 'CM2', 'CM4',
       'Endothelial Cells', 'Endothelial Cells 2', 'Erythrocytes',
       'Fibroblasts', 'Immune Cells', 'Pericytes', 'Vasculature Cells']
new_names = ['AV/ Ventricular Conduction Cells', 'Cardiac Myocyte 1', 'Cardiac Myocyte 2', 'Cardiac Myocyte 3',
       'Endothelial Cells', 'Endothelial Cells 2', 'Erythrocytes',
       'Fibroblasts', 'Immune Cells', 'Pericytes', 'Vasculature Cells']
adata_orig.obs['cell_type'] = adata_orig.obs['cell_type'].map(dict(zip(old_names, new_names))).astype('category')

In [None]:
sc.pl.umap(adata_orig, color = ['cell_type',
                              ], size = 6, legend_fontsize = 10, frameon = False, use_raw=True, color_map='tab10' )

In [None]:
sc.pl.umap(adata_orig, color='cell_type', legend_loc='on data',
           frameon=False, legend_fontsize=6)