Skip to content

Commit

Permalink
Merge pull request #47 from klarman-cell-observatory/boli
Browse files Browse the repository at this point in the history
Remove zero categories when using hashing or genetic pooling, modifie…
  • Loading branch information
bli25 committed Dec 4, 2020
2 parents be7f04d + ceb92f3 commit 51559be
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 9 deletions.
3 changes: 1 addition & 2 deletions ext_modules/fast_funcs.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ cpdef tuple split_barcode_channel(str[:] arr):
for i in range(size):
res = arr[i].rsplit(sep = '-', maxsplit = 1)
bview[i] = res[0]
if len(res) > 1:
cview[i] = res[1]
cview[i] = res[1]

return (barcodes, channels)

Expand Down
5 changes: 5 additions & 0 deletions pegasusio/qc_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import pandas as pd

from pegasusio import UnimodalData

Expand Down Expand Up @@ -158,6 +159,10 @@ def apply_qc_filters(unidata: UnimodalData):
cols = ["passed_qc"]
if unidata.uns.get("__del_demux_type", False):
cols.append("demux_type")
if "assignment" in unidata.obs:
# remove categories that contain no elements
series = unidata.obs["assignment"].value_counts(sort = False)
unidata.obs["assignment"] = pd.Categorical(unidata.obs["assignment"], categories = series[series > 0].index.astype(str))
# del unidata.uns["__del_demux_type"]

unidata.obs.drop(columns=cols, inplace=True)
Expand Down
11 changes: 4 additions & 7 deletions pegasusio/unimodal_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,10 +345,10 @@ def _inplace_subset_var(self, index: List[bool]) -> None:


def separate_channels(self) -> None:
""" Separate channel information from barcodekeys, only used for 10x v2, v3 h5 and mtx.
""" Separate channel information from barcodekeys, used for 10x v2, v3 h5 and mtx as well as Optimus loom.
"""
if self.barcode_metadata.shape[0] == 0:
return None # no data
if self.barcode_metadata.shape[0] == 0 or self.barcode_metadata.index[0].find("-") < 0:
return None # no data or no dash to remove

try:
from pegasusio.cylib.funcs import split_barcode_channel
Expand All @@ -357,10 +357,7 @@ def separate_channels(self) -> None:

barcodes, channels = split_barcode_channel(self.barcode_metadata.index.values)

if channels[0] is None:
return None # no need to separate channel information and the file should not be generated by cellranger

if (channels != "1").sum() > 0:
if np.unique(channels).size > 1:
# we have multiple channels
self.barcode_metadata["Channel"] = channels
barcodes = np.array([x + "-" + y for x, y in zip(channels, barcodes)], dtype = object)
Expand Down

0 comments on commit 51559be

Please sign in to comment.