In [1]:
import pandas as pd
import anndata
from anndata import AnnData
import scanpy as sc
import matplotlib.pyplot as plt
from os.path import join

In [2]:
raw_dir = "raw/zeisel_2015/"

In [3]:
# meta information
df_meta = pd.read_csv(join(raw_dir, "expression_mRNA_17-Aug-2014.txt"), nrows=10, sep='\t', header=None)
df_meta = df_meta.iloc[:, 1:].T
columns = df_meta.iloc[0, :]
df_meta = df_meta.iloc[1:, :]
df_meta.columns = columns
df_meta = df_meta.set_index("cell_id")
df_meta.columns.name = None
df_meta["total mRNA mol"] = df_meta["total mRNA mol"].astype(float)

# expression information
df_expr = pd.read_csv(join(raw_dir, "expression_mRNA_17-Aug-2014.txt"), skiprows=11, sep='\t', header=None).set_index(0)
df_expr.index.name = "gene"
df_gene_backspin = df_expr.iloc[:, [0]]
df_gene_backspin.columns = ['BackSPIN']
df_expr = df_expr.iloc[:, 1:]
df_expr.columns = df_meta.index
df_expr = df_expr.T
adata = AnnData(df_expr, obs=df_meta, var=df_gene_backspin)
adata.write_h5ad("processed/zeisel_2015.raw.h5ad")

... storing 'tissue' as categorical
... storing 'group #' as categorical
... storing 'well' as categorical
... storing 'sex' as categorical
... storing 'age' as categorical
... storing 'diameter' as categorical
... storing 'level1class' as categorical
... storing 'level2class' as categorical


In [None]:
adata = sc.read_h5ad("processed/zeisel_2015.raw.h5ad")

sc.pp.filter_cells(adata, min_genes=0)
sc.pp.filter_genes(adata, min_cells=30)

adata.raw = adata

sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
sc.pl.highest_expr_genes(adata, n_top=20, )

adata = adata[:, adata.var.highly_variable]

sc.pp.scale(adata, max_value=10)
sc.tl.pca(adata, svd_solver='arpack')
sc.set_figure_params(dpi=150)
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)
sc.tl.umap(adata)
sc.tl.leiden(adata)

raw_adata = adata.raw.to_adata()
raw_adata.obsp = adata.obsp
raw_adata.write_h5ad("processed/zeisel_2015.processed.h5ad")

In [9]:
df_cov = pd.DataFrame(index=adata.obs.index)
df_cov['const'] = 1
df_cov['n_genes'] = adata.obs['n_genes']
df_cov.to_csv("processed/zeisel_2015.cov.tsv", sep='\t')

In [None]:
sc.pl.umap(adata, color=["level1class", "level2class", "Gad1"], size=40, legend_loc="on data")