In [1]:
import scanpy as sc
import anndata as ad
import numpy as np
import random
import matplotlib.pyplot as plt
import scanpy.external as sce
import nsforest as ns
sc.settings.verbosity = 3
sc.set_figure_params(dpi=100)
np.random.seed(66)

data_path = "E:/Mouse_V1/P28NR/"
adata_counts = sc.read_csv(data_path + "GSE190940_P28NR.csv")

sample_IDs = ["P28_glut", "P28_gaba", "P28_non"] # 

accum_adata, cell_count = [], 0
for sample in sample_IDs:
    pre_path = data_path + sample + ".h5ad"
    temp_adata = sc.read_h5ad(
        pre_path,
                              )
    temp_adata.var_names_make_unique()
    obs_names = [name[:29] for name in temp_adata.obs_names]
    X_counts = adata_counts[obs_names, temp_adata.var_names].X
    temp_adata.X = X_counts
    accum_adata.append(temp_adata)
    cell_count += temp_adata.n_obs

adata = ad.concat(accum_adata, join="outer", label="class", keys=["glutamatergic", "GABAergic", "non-neuronal"])
adata

AnnData object with n_obs × n_vars = 23930 × 30869
    obs: 'batch', 'n_genes', 'percent_mito', 'n_counts', 'leiden', 'Doublet', 'Doublet Score', 'cluster', 'Class_broad', 'sample', 'Age', 'subclass', 'sample_new', 'layer', 'Type', 'Subclass', 'class'
    obsm: 'X_harmony', 'X_pca', 'X_umap'

In [2]:
adata.obs_names = [name[:25] for name in adata.obs_names]

In [3]:
renames = (("n_counts", "n_UMIs"), 
           ("Doublet", "doublet"), 
           ("Doublet Score", "doublet_score"), 
           ("sample", "batch"), 
           ("Subclass", "subclass"), 
           ("Type", "type"))

for orig, new in renames:
    adata.obs[new] = adata.obs[orig].copy()

adata.obs["percent_mito"] = adata.obs["percent_mito"] * 100

Create Glutamatergic AnnData

In [4]:
adata_glut = adata[adata.obs["class"] == "glutamatergic", accum_adata[0].var_names].copy()
adata_glut.obs = adata_glut.obs[["batch", "n_genes", "n_UMIs", "percent_mito", "doublet_score", "doublet", "class", "subclass", "type", "leiden"]].copy()
adata_glut.var = accum_adata[0].var.copy()
adata_glut.varm = accum_adata[0].varm.copy()
adata_glut.obsp = accum_adata[0].obsp.copy()
adata_glut.uns = accum_adata[0].uns.copy()

adata_glut.uns["type_colors"] = adata_glut.uns["Type_colors"].copy()
adata_glut.uns["batch_colors"] = adata_glut.uns["sample_colors"].copy()
del adata_glut.uns["Type_colors"]
del adata_glut.uns["sample_colors"]
del adata_glut.uns["cluster_colors"]
del adata_glut.uns["layer_colors"]

adata_glut.raw = None
adata_glut.layers["counts"] = adata_glut.X.copy()
CPTs = sc.pp.normalize_total(adata_glut, layer="counts", target_sum=1e4, inplace=False)
adata_glut.obs["norm_factor"] = CPTs["norm_factor"].copy()
adata_glut.layers["CPT"] = CPTs["X"].copy()
adata_glut.X = CPTs["X"].copy()
sc.pp.log1p(adata_glut)
# adata_glut.raw = adata_glut
adata_glut.layers["logCPT"] = adata_glut.X.copy()
sc.pp.scale(adata_glut, max_value=10)
adata_glut.layers["z-score"] = adata_glut.X.copy()

adata_glut.write_h5ad(data_path + "Mouse_V1_P28NR_Glut.h5ad")
adata_glut

normalizing counts per cell
    finished (0:00:00)


AnnData object with n_obs × n_vars = 15315 × 30869
    obs: 'batch', 'n_genes', 'n_UMIs', 'percent_mito', 'doublet_score', 'doublet', 'class', 'subclass', 'type', 'norm_factor'
    var: 'gene_ids', 'feature_types', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: "dendrogram_['leiden']", 'hvg', 'leiden', 'leiden_colors', 'neighbors', 'pca', 'umap', 'type_colors', 'batch_colors', 'log1p'
    obsm: 'X_harmony', 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts', 'CPT', 'logCPT', 'z-score'
    obsp: 'connectivities', 'distances'

Create GABAergic AnnData

In [6]:
adata_glut.obsm["X_harmony"].shape

(15315, 50)

In [None]:
adata.obs = adata.obs[["batch", "n_genes", "n_UMIs", "percent_mito", "doublet_score", "doublet", "class", "subclass", "type"]]