Skip to content

Commit

Permalink
Merge pull request #99 from lilab-bcb/spaceranger-2-0
Browse files Browse the repository at this point in the history
Fix issue on spatial coordinates
  • Loading branch information
yihming committed Sep 9, 2022
2 parents db791c2 + 410e3ed commit 7deb6ba
Showing 1 changed file with 32 additions and 21 deletions.
53 changes: 32 additions & 21 deletions pegasusio/spatial_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ def load_visium_folder(input_path) -> MultimodalData:
sample_id = input_path.split("/")[-1]
# Load count matrix.
hdf5_filename = "filtered_feature_bc_matrix.h5"
assert hdf5_filename in file_list, "Filtered feature-barcode matrix in HDF5 format is missing!"
assert (
hdf5_filename in file_list
), "Filtered feature-barcode matrix in HDF5 format is missing!"
rna_data = load_10x_h5_file(f"{input_path}/{hdf5_filename}")

# Load spatial metadata.
Expand All @@ -34,7 +36,7 @@ def load_visium_folder(input_path) -> MultimodalData:
tissue_pos_csv = f"{input_path}/spatial/tissue_positions.csv"
if os.path.exists(tissue_pos_csv):
spatial_metadata = pd.read_csv(tissue_pos_csv)
spatial_metadata.rename(columns={'barcode': 'barcodekey'}, inplace=True)
spatial_metadata.rename(columns={"barcode": "barcodekey"}, inplace=True)
else:
tissue_pos_csv = f"{input_path}/spatial/tissue_positions_list.csv"
assert os.path.exists(tissue_pos_csv), f"Cannot locate file {tissue_pos_csv}!"
Expand All @@ -45,41 +47,50 @@ def load_visium_folder(input_path) -> MultimodalData:
"in_tissue",
"array_row",
"array_col",
"pxl_col_in_fullres",
"pxl_row_in_fullres",
"pxl_col_in_fullres",
],
)
process_spatial_metadata(spatial_metadata)

barcode_metadata = rna_data.obs.join(spatial_metadata, how='left')
barcode_metadata = rna_data.obs.join(spatial_metadata, how="left")
feature_metadata = rna_data.var

matrices = {"X": rna_data.X}
metadata = {"genome": rna_data.get_genome(), "modality": "visium"}

# Store “pxl_col_in_fullres” and ”pxl_row_in_fullres” as a 2D array,
# which is the spatial location info of each cell in the dataset.
spatial_coords = barcode_metadata[['pxl_row_in_fullres', 'pxl_col_in_fullres']]
barcode_multiarrays = {"X_spatial": spatial_coords.to_numpy()}
barcode_metadata.drop(columns=['pxl_row_in_fullres', 'pxl_col_in_fullres'], inplace=True)
# Store “pxl_col_in_fullres” and ”pxl_row_in_fullres” as a 2D array, which is the spatial location info of each cell in the dataset.
barcode_multiarrays = {
"X_spatial": barcode_metadata[
["pxl_col_in_fullres", "pxl_row_in_fullres"]
].to_numpy()
}
barcode_metadata.drop(
columns=["pxl_row_in_fullres", "pxl_col_in_fullres"], inplace=True
)

# Store image metadata as a Pandas DataFrame, with the following structure:
image_metadata = pd.DataFrame()
image_metadata = pd.DataFrame(
columns=["sample_id", "image_id", "data", "scale_factor", "spot_diameter"]
)
spatial_path = f"{input_path}/spatial"

with open(f"{spatial_path}/scalefactors_json.json") as fp:
scale_factors = json.load(fp)

def get_image_data(filepath, sample_id, image_id, scaleFactor, spot_diameter_fullres):
def get_image_data(
filepath, sample_id, image_id, scaleFactor, spot_diameter_fullres
):
data = Image.open(filepath)
dict = {
"sample_id": sample_id,
"image_id": image_id,
"data": data,
"scale_factor": scaleFactor,
"spot_diameter": spot_diameter_fullres * scaleFactor,
}
return dict
return pd.DataFrame(
{
"sample_id": [sample_id],
"image_id": [image_id],
"data": [data],
"scale_factor": [scaleFactor],
"spot_diameter": [spot_diameter_fullres * scaleFactor],
}
)

for png in [f for f in os.listdir(spatial_path) if re.match(".*\.png", f)]:
if ("_hires_" in png) or ("_lowres_" in png):
Expand All @@ -90,9 +101,9 @@ def get_image_data(filepath, sample_id, image_id, scaleFactor, spot_diameter_ful
sample_id,
res_tag,
scale_factors[f"tissue_{res_tag}_scalef"],
scale_factors["spot_diameter_fullres"]
scale_factors["spot_diameter_fullres"],
)
image_metadata = image_metadata.append(image_item, ignore_index=True)
image_metadata = pd.concat([image_metadata, image_item], ignore_index=True)

assert not image_metadata.empty, "the image data frame is empty"
spdata = SpatialData(
Expand Down

0 comments on commit 7deb6ba

Please sign in to comment.