In [16]:
import scanpy.api as sc
import pandas as pd
import loompy
import numpy as np
import anndata as ad
import ntpath

In [17]:
def pandas_into_adata(path):
    dataset = pd.read_table(path)
    var_seq = pd.DataFrame(index=dataset.index)
    obs_seq = pd.DataFrame(index=dataset.columns)
    X_seq = dataset.values.T
    adata = ad.AnnData(X_seq, obs=obs_seq, var=var_seq)
    adata.obs['n_counts'] = adata.X.sum(axis=1)
    return adata

In [18]:
def preprocess(adata):
    sc.pp.filter_cells(adata, min_genes=200)
    sc.pp.filter_genes(adata, min_cells=3)
    sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e4)
    filter_result = sc.pp.filter_genes_dispersion(
    adata.X, min_mean=0.0125, max_mean=3, min_disp=0.5)
    adata.raw = sc.pp.log1p(adata, copy=True)
    return adata


In [19]:
def generate_loom_filename(path):
    filename = ntpath.basename(path)
    filename = filename.rsplit(".", maxsplit=1 )[0] + ".loom"
    return filename
    

In [20]:
def load_preprocess_store(path):
    adata = pandas_into_adata(path)
    processed = preprocess(adata)
    loom_file = generate_loom_filename(path)
    adata.write_loom(loom_file)
    read = sc.read_loom(loom_file)
    return read

In [21]:
seqwell =load_preprocess_store("./pbmc_SeqWell.expressionMatrix.txt")


In [23]:
tenx= load_preprocess_store("./pbmc_10X.expressionMatrix.txt")

In [24]:
tenx

AnnData object with n_obs × n_vars = 2638 × 13656 
    obs: 'n_counts', 'n_genes'
    var: 'n_cells'