Skip to content

Commit

Permalink
Merge pull request #74 from klarman-cell-observatory/boli
Browse files Browse the repository at this point in the history
Added obsp and varp
  • Loading branch information
yihming committed Oct 18, 2021
2 parents 8ff99b9 + e6174b8 commit d793ba7
Show file tree
Hide file tree
Showing 9 changed files with 321 additions and 94 deletions.
4 changes: 3 additions & 1 deletion pegasusio/citeseq_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,12 @@ def __init__(
metadata: dict,
barcode_multiarrays: Dict[str, np.ndarray] = None,
feature_multiarrays: Dict[str, np.ndarray] = None,
barcode_multigraphs: Dict[str, csr_matrix] = None,
feature_multigraphs: Dict[str, csr_matrix] = None,
cur_matrix: str = "raw.count",
) -> None:
assert metadata["modality"] == "citeseq"
super().__init__(barcode_metadata, feature_metadata, matrices, metadata, barcode_multiarrays, feature_multiarrays, cur_matrix)
super().__init__(barcode_metadata, feature_metadata, matrices, metadata, barcode_multiarrays, feature_multiarrays, barcode_multigraphs, feature_multigraphs, cur_matrix)


def from_anndata(self, data: anndata.AnnData, genome: str = None, modality: str = None) -> None:
Expand Down
5 changes: 4 additions & 1 deletion pegasusio/cyto_data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from typing import List, Dict, Union

import logging
Expand All @@ -23,10 +24,12 @@ def __init__(
metadata: dict,
barcode_multiarrays: Dict[str, np.ndarray] = None,
feature_multiarrays: Dict[str, np.ndarray] = None,
barcode_multigraphs: Dict[str, csr_matrix] = None,
feature_multigraphs: Dict[str, csr_matrix] = None,
cur_matrix: str = "raw.data",
) -> None:
assert metadata["modality"] == "cyto"
super().__init__(barcode_metadata, feature_metadata, matrices, metadata, barcode_multiarrays, feature_multiarrays, cur_matrix)
super().__init__(barcode_metadata, feature_metadata, matrices, metadata, barcode_multiarrays, feature_multiarrays, barcode_multigraphs, feature_multigraphs, cur_matrix)


def from_anndata(self, data: anndata.AnnData, genome: str = None, modality: str = None) -> None:
Expand Down
29 changes: 25 additions & 4 deletions pegasusio/hdf5_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,13 @@ def load_loom_file(input_loom: str, genome: str = None, modality: str = None) ->
else:
raise ValueError(f"Detected row attribute '{key}' has ndim = {arr.ndim}!")

barcode_multigraphs = {}
for key, graph in ds.col_graphs.items():
barcode_multigraphs[key] = csr_matrix(graph)
feature_multigraphs = {}
for key, graph in ds.row_graphs.items():
feature_multigraphs[key] = csr_matrix(graph)

matrices = {}
for key, mat in ds.layers.items():
key = "X" if key == "" else key
Expand All @@ -220,7 +227,7 @@ def load_loom_file(input_loom: str, genome: str = None, modality: str = None) ->
else:
metadata["modality"] = "rna"

unidata = UnimodalData(barcode_metadata, feature_metadata, matrices, metadata, barcode_multiarrays, feature_multiarrays)
unidata = UnimodalData(barcode_metadata, feature_metadata, matrices, metadata, barcode_multiarrays, feature_multiarrays, barcode_multigraphs, feature_multigraphs)
unidata.separate_channels()

data = MultimodalData(unidata)
Expand All @@ -239,13 +246,20 @@ def write_loom_file(data: MultimodalData, output_file: str) -> None:
if len(matrices) == 0:
raise ValueError("Could not write empty matrix to a loom file!")

def _replace_slash(name: str) -> str:
""" Replace slash with |
"""
if name.find('/') >= 0:
return name.replace('/', '|')
return name

def _process_attrs(key_name: str, attrs: pd.DataFrame, attrs_multi: dict) -> Dict[str, object]:
res_dict = {key_name: attrs.index.values}
for key in attrs.columns:
res_dict[key] = np.array(attrs[key].values)
res_dict[_replace_slash(key)] = np.array(attrs[key].values)
for key, value in attrs_multi.items():
if value.ndim > 1: # value.ndim == 1 refers to np.recarray, which will not be written to a loom file.
res_dict[key] = value if value.shape[1] > 1 else value[:, 0]
res_dict[_replace_slash(key)] = value if value.shape[1] > 1 else value[:, 0]
return res_dict

row_attrs = _process_attrs("Gene", data.var, data.varm)
Expand All @@ -262,9 +276,16 @@ def _process_attrs(key_name: str, attrs: pd.DataFrame, attrs_multi: dict) -> Dic
file_attrs = {}
for key, value in data.uns.items():
if isinstance(value, str):
file_attrs[key] = value
file_attrs[_replace_slash(key)] = value

import loompy
loompy.create(output_file, layers, row_attrs, col_attrs, file_attrs = file_attrs)

if len(data.varp) > 0 or len(data.obsp) > 0:
with loompy.connect(output_file) as ds:
for key, value in data.varp.items():
ds.row_graphs[_replace_slash(key)] = value
for key, value in data.obsp.items():
ds.col_graphs[_replace_slash(key)] = value

logger.info(f"{output_file} is written.")
32 changes: 31 additions & 1 deletion pegasusio/multimodal_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,24 @@ def varm(self, varm: Dict[str, np.ndarray]):
assert self._unidata is not None
self._unidata.varm = varm

@property
def obsp(self) -> Union[Dict[str, csr_matrix], None]:
return self._unidata.obsp if self._unidata is not None else None

@obsp.setter
def obsp(self, obsp: Dict[str, csr_matrix]):
assert self._unidata is not None
self._unidata.obsp = obsp

@property
def varp(self) -> Dict[str, csr_matrix]:
return self._unidata.varp if self._unidata is not None else None

@varp.setter
def varp(self, varp: Dict[str, csr_matrix]):
assert self._unidata is not None
self._unidata.varp = varp

@property
def uns(self) -> Union[dict, None]:
return self._unidata.uns if self._unidata is not None else None
Expand All @@ -128,6 +146,18 @@ def shape(self, _shape: Tuple[int, int]):
assert self._unidata is not None
self._unidata.shape = _shape

def get_attr_type(self, attr:str) -> str:
""" Surrogate function to return registered type for an attribute
"""
assert self._unidata is not None
self._unidata.get_attr_type(attr)

def register_attr(self, attr: str, attr_type: str = None) -> None:
""" Surrogate function to register an attribute (either in obs or obsm) with an attr_type (e.g. signature, cluster, basis)
"""
assert self._unidata is not None
self._unidata.register_attr(attr, attr_type)

def as_float(self, matkey: str = None) -> None:
""" Surrogate function to convert matrix to float """
assert self._unidata is not None
Expand Down Expand Up @@ -393,7 +423,7 @@ def filter_data(self,


def concat_data(self, modality: str = "rna"):
""" Used for raw data, Ignore multiarrays and only consider one matrix per unidata """
""" Used for raw data, Ignore multiarrays/multigraphs and only consider one matrix per unidata """
genomes = []
unidata_arr = []

Expand Down
5 changes: 4 additions & 1 deletion pegasusio/nanostring_data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from typing import List, Dict, Union

import logging
Expand All @@ -21,10 +22,12 @@ def __init__(
metadata: dict,
barcode_multiarrays: Dict[str, np.ndarray] = None,
feature_multiarrays: Dict[str, np.ndarray] = None,
barcode_multigraphs: Dict[str, csr_matrix] = None,
feature_multigraphs: Dict[str, csr_matrix] = None,
cur_matrix: str = "Q3Norm",
) -> None:
assert metadata["modality"] == "nanostring"
super().__init__(barcode_metadata, feature_metadata, matrices, metadata, barcode_multiarrays, feature_multiarrays, cur_matrix)
super().__init__(barcode_metadata, feature_metadata, matrices, metadata, barcode_multiarrays, feature_multiarrays, barcode_multigraphs, feature_multigraphs, cur_matrix)


def from_anndata(self, data: anndata.AnnData, genome: str = None, modality: str = None) -> None:
Expand Down

0 comments on commit d793ba7

Please sign in to comment.