# Process scRNA-seq Data with msmu


In [1]:
from pathlib import Path
import pandas as pd
import requests
import scanpy as sc
import tarfile

In [2]:
url = "https://github.com/bertis-informatics/msmu/releases/download/0.2.2/GSE201575.tar.gz"
meta = "https://github.com/bertis-informatics/msmu/releases/download/0.2.2/meta.csv"
base_dir = Path(url).name.split(".")[0]

r = requests.get(url)
r.raise_for_status()

with open(Path(url).name, "wb") as f:
    f.write(r.content)

with tarfile.open(Path(url).name, "r:gz") as tar:
    members = [m for m in tar.getmembers() if not Path(m.name).name.startswith("._")]
    tar.extractall(members=members)

  tar.extractall(members=members)


## Read count matrix


In [3]:
path_list = Path(base_dir).glob("*.txt.gz")
path_list = sorted(path_list)
adata_list = []

for p in path_list:
    a = sc.read_text(p, delimiter="\t", first_column_names=True).T
    a.obs.index = [p.stem.split(".")[0].split("_")[1]]
    a.obs["filename"] = [p.stem]
    adata_list.append(a)

adata = sc.concat(adata_list)

## Add metadata and filter samples on use


In [4]:
meta_df = pd.read_csv(meta)
meta_df = meta_df.dropna()
meta_df.index = meta_df["sample_rna"].values

adata.obs = adata.obs.merge(meta_df, left_index=True, right_index=True, how="left")

adata = adata[adata.obs.dropna().index.to_list()].copy()
adata.obs.index = adata.obs["sample_id"].values

adata

AnnData object with n_obs × n_vars = 70 × 40207
    obs: 'filename', 'set', 'sample_id', 'sample', 'cell', 'condition', 'sample_rna'

## Filtering and normalization


In [5]:
adata.layers["counts"] = adata.X.copy()

sc.pp.filter_genes(adata, min_cells=3, inplace=True)
sc.pp.filter_cells(adata, min_genes=200, inplace=True)

sc.pp.normalize_total(adata)
sc.pp.log1p(adata)

adata

AnnData object with n_obs × n_vars = 70 × 13451
    obs: 'filename', 'set', 'sample_id', 'sample', 'cell', 'condition', 'sample_rna', 'n_genes'
    var: 'n_cells'
    uns: 'log1p'
    layers: 'counts'

## Save AnnData object


In [6]:
adata.write_h5ad("GSE201575.h5ad")