Skip to content

Commit

Permalink
Merge pull request #44 from klarman-cell-observatory/boli
Browse files Browse the repository at this point in the history
Three updates. 1. do not recalculate n_genes, n_counts and percent_mi…
  • Loading branch information
bli25 committed Nov 15, 2020
2 parents 9030916 + 0ef4638 commit c46a539
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 13 deletions.
16 changes: 13 additions & 3 deletions pegasusio/multimodal_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,16 @@ def _convert_attributes_to_categorical(self, attributes: Set[str]) -> None:
unidata._convert_attributes_to_categorical(attributes)


def _clean_tmp(self) -> None:
for unidata in self.data.values():
unidata._clean_tmp()
def _clean_tmp(self) -> dict:
_tmp_multi = {}
for key, unidata in self.data.items():
_tmp_dict = unidata._clean_tmp()
if _tmp_dict is not None:
_tmp_multi[key] = _tmp_dict
return _tmp_multi

def _addback_tmp(self, _tmp_multi) -> None:
for key, _tmp_dict in _tmp_multi.items():
self.data[key]._addback_tmp(_tmp_dict)


24 changes: 17 additions & 7 deletions pegasusio/qc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ def calc_qc_filters(
min_cond = min_genes is not None
max_cond = max_genes is not None
if min_cond or max_cond:
unidata.obs["n_genes"] = unidata.X.getnnz(axis=1)
if "n_genes" not in unidata.obs:
unidata.obs["n_genes"] = unidata.X.getnnz(axis=1)
if min_cond:
filters.append(unidata.obs["n_genes"] >= min_genes)
if max_cond:
Expand All @@ -126,18 +127,20 @@ def calc_qc_filters(
max_cond = max_umis is not None
calc_mito = (mito_prefix is not None) and (percent_mito is not None)
if min_cond or max_cond or calc_mito:
unidata.obs["n_counts"] = unidata.X.sum(axis=1).A1
if "n_counts" not in unidata.obs:
unidata.obs["n_counts"] = unidata.X.sum(axis=1).A1
if min_cond:
filters.append(unidata.obs["n_counts"] >= min_umis)
if max_cond:
filters.append(unidata.obs["n_counts"] < max_umis)
if calc_mito:
mito_genes = unidata.var_names.map(lambda x: x.startswith(mito_prefix)).values.nonzero()[0]

unidata.obs["percent_mito"] = (
unidata.X[:, mito_genes].sum(axis=1).A1
/ np.maximum(unidata.obs["n_counts"].values, 1.0)
) * 100
if "percent_mito" not in unidata.obs:
unidata.obs["percent_mito"] = (
unidata.X[:, mito_genes].sum(axis=1).A1
/ np.maximum(unidata.obs["n_counts"].values, 1.0)
) * 100

filters.append(unidata.obs["percent_mito"] < percent_mito)

Expand All @@ -155,7 +158,14 @@ def apply_qc_filters(unidata: UnimodalData):
cols = ["passed_qc"]
if unidata.uns.get("__del_demux_type", False):
cols.append("demux_type")
del unidata.uns["__del_demux_type"]
# del unidata.uns["__del_demux_type"]

unidata.obs.drop(columns=cols, inplace=True)
if len(unidata.obsm) > 0:
unidata.obsm.clear()
if len(unidata.varm) > 0:
unidata.varm.clear()
for key in list(unidata.uns):
if key not in {'genome', 'modality', 'norm_count'}:
del unidata.uns[key]
logger.info(f"After filtration, {unidata.shape[0]} out of {prior_n} cell barcodes are kept in UnimodalData object {unidata.get_uid()}.")
3 changes: 2 additions & 1 deletion pegasusio/readwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def write_output(
"""
if isinstance(data, UnimodalData):
data = MultimodalData(data)
data._clean_tmp() # for each unidata, remove uns keys starting with '_tmp'
_tmp_multi = data._clean_tmp() # for each unidata, remove uns keys starting with '_tmp' and store these values to _tmp_multi

output_file = os.path.expanduser(os.path.expandvars(output_file))

Expand Down Expand Up @@ -258,4 +258,5 @@ def _infer_output_file_type(output_File: str) -> str:
else:
raise ValueError(f"Unknown file type '{file_type}'!")

data._addback_tmp(_tmp_multi)
logger.info(f"{file_type} file '{output_file}' is written.")
10 changes: 8 additions & 2 deletions pegasusio/unimodal_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,13 @@ def _convert_attributes_to_categorical(self, attributes: Set[str]) -> None:
self.barcode_metadata[attr] = pd.Categorical(values, categories=natsorted(np.unique(values)))


def _clean_tmp(self) -> None:
def _clean_tmp(self) -> dict:
_tmp_dict = {}
for key in list(self.metadata):
if key.startswith("_tmp"):
del self.metadata[key]
_tmp_dict[key] = self.metadata.pop(key)
return _tmp_dict if len(_tmp_dict) > 0 else None

def _addback_tmp(self, _tmp_dict: dict) -> None:
self.metadata.update(_tmp_dict)

0 comments on commit c46a539

Please sign in to comment.