Skip to content

Commit

Permalink
Merge pull request #24 from klarman-cell-observatory/boli
Browse files Browse the repository at this point in the history
Updated mito
  • Loading branch information
bli25 committed Jun 10, 2020
2 parents c9ca38c + b22942e commit a550aa3
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 37 deletions.
4 changes: 2 additions & 2 deletions pegasusio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
from .vdj_data import VDJData
from .citeseq_data import CITESeqData
from .cyto_data import CytoData
from .qc_utils import calc_qc_filters, apply_qc_filters
from .qc_utils import calc_qc_filters, apply_qc_filters, DictWithDefault
from .multimodal_data import MultimodalData
from .aggr_data import AggrData
from .aggr_data import AggrData, _get_fillna_dict
from .readwrite import infer_file_type, read_input, write_output
from .data_aggregation import aggregate_matrices

Expand Down
40 changes: 20 additions & 20 deletions pegasusio/aggr_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,24 @@
from pegasusio import UnimodalData, CITESeqData, CytoData, VDJData, MultimodalData


def _get_fillna_dict(df: pd.DataFrame) -> dict:
""" Generate a fillna dict for columns in a df """
fillna_dict = {}
for column in df:
if df[column].dtype.kind == "b":
fillna_dict[column] = False
elif df[column].dtype.kind in {"i", "u", "f", "c"}:
fillna_dict[column] = 0
elif df[column].dtype.kind == "S":
fillna_dict[column] = b""
elif df[column].dtype.kind in {"O", "U"}:
fillna_dict[column] = ""
else:
raise ValueError(f"{column} has unsupported dtype {df[column].dtype}!")

return fillna_dict


class AggrData:
def __init__(self):
self.aggr = defaultdict(list)
Expand All @@ -21,24 +39,6 @@ def add_data(self, data: MultimodalData) -> None:
self.aggr[key].append(data.get_data(key))


def _get_fillna_dict(self, df: pd.DataFrame) -> dict:
""" Generate a fillna dict for columns in a df """
fillna_dict = {}
for column in df:
if df[column].dtype.kind == "b":
fillna_dict[column] = False
elif df[column].dtype.kind in {"i", "u", "f", "c"}:
fillna_dict[column] = 0
elif df[column].dtype.kind == "S":
fillna_dict[column] = b""
elif df[column].dtype.kind in {"O", "U"}:
fillna_dict[column] = ""
else:
raise ValueError(f"{column} has unsupported dtype {df[column].dtype}!")

return fillna_dict


@run_gc
def _merge_matrices(self, feature_metadata: pd.DataFrame, unilist: List[UnimodalData], modality: str) -> Dict[str, csr_matrix]:
""" Merge all matrices together """
Expand Down Expand Up @@ -134,7 +134,7 @@ def _aggregate_unidata(self, unilist: List[UnimodalData]) -> UnimodalData:

barcode_metadata_dfs = [unidata.barcode_metadata for unidata in unilist]
barcode_metadata = pd.concat(barcode_metadata_dfs, axis=0, sort=False, copy=False)
fillna_dict = self._get_fillna_dict(barcode_metadata)
fillna_dict = _get_fillna_dict(barcode_metadata)
barcode_metadata.fillna(value=fillna_dict, inplace=True)


Expand All @@ -154,7 +154,7 @@ def _aggregate_unidata(self, unilist: List[UnimodalData]) -> UnimodalData:
for other in unilist[1:]:
keys = ["featurekey"] + feature_metadata.columns.intersection(other.feature_metadata.columns).values.tolist()
feature_metadata = feature_metadata.merge(other.feature_metadata, on=keys, how="outer", sort=False, copy=False) # If sort is True, feature keys will be changed even if all channels share the same feature keys.
fillna_dict = self._get_fillna_dict(feature_metadata)
fillna_dict = _get_fillna_dict(feature_metadata)
feature_metadata.fillna(value=fillna_dict, inplace=True)


Expand Down
26 changes: 12 additions & 14 deletions pegasusio/multimodal_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import anndata

from pegasusio import UnimodalData, VDJData, CITESeqData, CytoData
from pegasusio import calc_qc_filters, apply_qc_filters
from pegasusio import calc_qc_filters, apply_qc_filters, DictWithDefault
from .views import INDEX, UnimodalDataView
from .datadict import MultiDataDict
from .vdj_data import VDJDataView
Expand Down Expand Up @@ -340,23 +340,21 @@ def filter_data(self,
focus_list = [self._selected]
focus_set = set(focus_list)

mito_dict = {}
default_mito = None
if mito_prefix is not None:
fields = mito_prefix.split(',')
if len(fields) == 1 and fields[0].find(':') < 0:
default_mito = fields[0]
else:
for field in fields:
genome, mito_pref = field.split(':')
mito_dict[genome] = mito_pref

unselected = []
mito_dict = DictWithDefault(mito_prefix)
for key, unidata in self.data.items():
if (key in focus_set) and (unidata.get_modality() == "rna"):
mito_pref = mito_dict.get(unidata.get_genome(), default_mito)
if "passed_qc" not in unidata.obs:
calc_qc_filters(unidata, select_singlets = select_singlets, remap_string = remap_string, subset_string = subset_string, min_genes = min_genes, max_genes = max_genes, min_umis = min_umis, max_umis = max_umis, mito_prefix = mito_pref, percent_mito = percent_mito)
calc_qc_filters(unidata,
select_singlets = select_singlets,
remap_string = remap_string,
subset_string = subset_string,
min_genes = min_genes,
max_genes = max_genes,
min_umis = min_umis,
max_umis = max_umis,
mito_prefix = mito_dict.get(unidata.get_genome()),
percent_mito = percent_mito)
apply_qc_filters(unidata)
selected_barcodes = unidata.obs_names if selected_barcodes is None else selected_barcodes.union(unidata.obs_names)
else:
Expand Down
20 changes: 20 additions & 0 deletions pegasusio/qc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,26 @@



class DictWithDefault:
### Used for parsing mito prefix
def __init__(self, string: str):
self.mapping = {}
self.default = None

if string is not None:
fields = string.split(',')
for field in fields:
if field.find(':') >= 0:
key, value = field.split(':')
self.mapping[key] = value
else:
self.default = field

def get(self, key: str) -> str:
return self.mapping.get(key, self.default)



def calc_qc_filters(
unidata: UnimodalData,
select_singlets: bool = False,
Expand Down
5 changes: 4 additions & 1 deletion pegasusio/unimodal_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,10 @@ def to_anndata(self) -> anndata.AnnData:
"""
raw = None
if "raw.X" in self.matrices:
raw = anndata.AnnData(X = self.matrices["raw.X"])
var_cols = []
if "featureid" in self.feature_metadata:
var_cols.append("featureid")
raw = anndata.AnnData(X = self.matrices["raw.X"], var = self.feature_metadata[var_cols])

layers = {}
for key, value in self.matrices.items():
Expand Down

0 comments on commit a550aa3

Please sign in to comment.