Skip to content

Commit

Permalink
Merge pull request #84 from klarman-cell-observatory/yiming
Browse files Browse the repository at this point in the history
Improve 10X Visium data loading
  • Loading branch information
yihming committed Jan 9, 2022
2 parents 8e2e597 + 2331c10 commit 6c82c8c
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 21 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ ext_modules/*.so
__pycache__
build/
dist/
.eggs/
pegasusio.egg-info/
pegasusio/cylib/*.so
.ipynb_checkpoints
Expand Down
8 changes: 4 additions & 4 deletions pegasusio/spatial_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(
barcode_multigraphs: Optional[Dict[str, csr_matrix]] = None,
feature_multigraphs: Optional[Dict[str, csr_matrix]] = None,
cur_matrix: str = "X",
img: Optional[pd.DataFrame] = None,
image_metadata: Optional[pd.DataFrame] = None,
) -> None:
assert metadata["modality"] == "visium"
super().__init__(
Expand All @@ -43,15 +43,15 @@ def __init__(
feature_multigraphs,
cur_matrix,
)
self._img = img
self.image_metadata = image_metadata

@property
def img(self) -> Optional[pd.DataFrame]:
return self._img
return self.image_metadata

@img.setter
def img(self, img: pd.DataFrame):
self._img = img
self.image_metadata = img

def __repr__(self) -> str:
repr_str = super().__repr__()
Expand Down
29 changes: 14 additions & 15 deletions pegasusio/spatial_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def load_visium_folder(input_path) -> MultimodalData:
file_list = os.listdir(input_path)
sample_id = input_path.split("/")[-1]
# Load count matrix.
hdf5_filename = "raw_feature_bc_matrix.h5"
hdf5_filename = "filtered_feature_bc_matrix.h5"
assert hdf5_filename in file_list, "Raw count hdf5 file is missing!"
rna_data = load_10x_h5_file(f"{input_path}/{hdf5_filename}")

Expand All @@ -44,35 +44,33 @@ def load_visium_folder(input_path) -> MultimodalData:
)
process_spatial_metadata(spatial_metadata)

barcode_metadata = pd.concat([rna_data.obs, spatial_metadata], axis=1)
barcode_metadata = rna_data.obs.join(spatial_metadata, how='left')
feature_metadata = rna_data.var

matrices = {"X": rna_data.X}
metadata = {"genome": rna_data.get_genome(), "modality": "visium"}

# Store “pxl_col_in_fullres” and ”pxl_row_in_fullres” as a 2D array,
# which is the spatial location info of each cell in the dataset.
obsm = spatial_metadata[["pxl_col_in_fullres", "pxl_row_in_fullres"]]
barcode_multiarrays = {"spatial_coordinates": obsm.to_numpy()}

# Store all the other spatial info of cells, i.e. “in_tissue”, “array_row”, and “array_col”
obs = spatial_metadata[["in_tissue", "array_row", "array_col"]]
barcode_metadata = obs
spatial_coords = barcode_metadata[['pxl_row_in_fullres', 'pxl_col_in_fullres']]
barcode_multiarrays = {"X_spatial": spatial_coords.to_numpy()}
barcode_metadata.drop(columns=['pxl_row_in_fullres', 'pxl_col_in_fullres'], inplace=True)

# Store image metadata as a Pandas DataFrame, with the following structure:
img = pd.DataFrame()
image_metadata = pd.DataFrame()
spatial_path = f"{input_path}/spatial"

with open(f"{spatial_path}/scalefactors_json.json") as fp:
scale_factors = json.load(fp)

def get_image_data(filepath, sample_id, image_id, scaleFactor):
def get_image_data(filepath, sample_id, image_id, scaleFactor, spot_diameter_fullres):
data = Image.open(filepath)
dict = {
"sample_id": sample_id,
"image_id": image_id,
"data": data,
"scaleFactor": scaleFactor,
"scale_factor": scaleFactor,
"spot_diameter": spot_diameter_fullres * scaleFactor,
}
return dict

Expand All @@ -84,18 +82,19 @@ def get_image_data(filepath, sample_id, image_id, scaleFactor):
filepath,
sample_id,
res_tag,
scale_factors[f"tissue_{res_tag}_scalef"]
scale_factors[f"tissue_{res_tag}_scalef"],
scale_factors["spot_diameter_fullres"]
)
img = img.append(image_item, ignore_index=True)
image_metadata = image_metadata.append(image_item, ignore_index=True)

assert not img.empty, "the image data frame is empty"
assert not image_metadata.empty, "the image data frame is empty"
spdata = SpatialData(
barcode_metadata,
feature_metadata,
matrices,
metadata,
barcode_multiarrays=barcode_multiarrays,
img=img,
image_metadata=image_metadata,
)
data = MultimodalData(spdata)

Expand Down
4 changes: 2 additions & 2 deletions pegasusio/zarr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def read_unimodal_data(self, group: zarr.Group) -> UnimodalData:
else dict(),
)
if isinstance (unidata, SpatialData):
unidata.img = self.read_dataframe(group["img"]) if "img" in group else dict()
unidata.image_metadata = self.read_dataframe(group["image_metadata"]) if "image_metadata" in group else dict()

if group.attrs.get("_cur_matrix", None) is not None:
unidata.select_matrix(group.attrs["_cur_matrix"])
Expand Down Expand Up @@ -443,7 +443,7 @@ def write_unimodal_data(self, parent: zarr.Group, name: str, data: UnimodalData,
self.write_dataframe(group, 'feature_metadata', data.feature_metadata)

if hasattr(data, 'img'):
self.write_dataframe(group, 'img', data.img)
self.write_dataframe(group, 'image_metadata', data.image_metadata)

if overwrite or data.matrices.is_dirty():
self.write_mapping(group, 'matrices', data.matrices, overwrite = overwrite)
Expand Down

0 comments on commit 6c82c8c

Please sign in to comment.