Skip to content

Commit

Permalink
Merge pull request #27 from klarman-cell-observatory/boli
Browse files Browse the repository at this point in the history
Convert added attributes into categorical and recognize nan for Refer…
  • Loading branch information
bli25 committed Jun 24, 2020
2 parents 1a39d90 + f318222 commit e6ea082
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 1 deletion.
5 changes: 4 additions & 1 deletion pegasusio/data_aggregation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import numpy as np
import pandas as pd
from subprocess import check_call
from typing import List, Tuple, Dict, Set
Expand Down Expand Up @@ -43,7 +44,7 @@ def _parse_restriction_string(rstr: str) -> Tuple[str, bool, Set[str]]:

def _parse_genome_string(genome_str: str) -> Tuple[str, Dict[str, str]]:
genome = genome_dict = None
if genome_str is not None:
if (genome_str is not None) and (not np.isnan(genome_str)):
if genome_str.find(":") < 0:
genome = genome_str
else:
Expand Down Expand Up @@ -200,6 +201,8 @@ def aggregate_matrices(

# Merge data
aggregated_data = aggrData.aggregate()
if len(attributes) > 0:
aggregated_data._convert_attributes_to_categorical(attributes)
logger.info(f"Aggregated {tot} files.")

# Delete temporary file
Expand Down
5 changes: 5 additions & 0 deletions pegasusio/multimodal_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,3 +510,8 @@ def _propogate_genome(self) -> None:
unidata = self.data.pop(key)
unidata.uns["genome"] = genome
self.data[unidata.get_uid()] = unidata


def _convert_attributes_to_categorical(self, attributes: Set[str]) -> None:
for unidata in self.data.values():
unidata._convert_attributes_to_categorical(attributes)
7 changes: 7 additions & 0 deletions pegasusio/unimodal_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from scipy.sparse import csr_matrix
from collections.abc import MutableMapping
from copy import deepcopy
from natsort import natsorted
from typing import List, Dict, Union, Set, Tuple

import logging
Expand Down Expand Up @@ -527,3 +528,9 @@ def _update_barcode_metadata_info(
self.barcode_metadata[attr] = np.repeat(row[attr], nsample)

self.metadata["_sample"] = sample_name # record sample_name for merging


def _convert_attributes_to_categorical(self, attributes: Set[str]) -> None:
for attr in attributes:
values = self.barcode_metadata[attr].values
self.barcode_metadata[attr] = pd.Categorical(values, categories=natsorted(np.unique(values)))

0 comments on commit e6ea082

Please sign in to comment.