In [97]:
# Imports
import csv
import glob2
import matplotlib.pyplot as plt
import numpy as np
import scanpy as sc
import squidpy as sq
import warnings
from anndata import AnnData

In [1]:
# Globals
PRJ_DIR = "/scratch/gpfs/KANG/sereno/spatialstem"
SRC_DIR = f"{PRJ_DIR}/sourcefiles"
RAW_DIR = f"{SRC_DIR}/raw"
HAD_DIR = f"{SRC_DIR}/h5ad"
INT_DIR = f"{PRJ_DIR}/intermediates"
FIG_DIR = f"{PRJ_DIR}/figs"

In [24]:
# Dann: Mapping the developing human immune system across organs
dann_dir = f"{RAW_DIR}/pub1_dann"
dann_paths = glob2.glob(f"{dann_dir}/*")
# Already formatted...

In [53]:
# Niec: Lymphatics act as a signaling hub to regulate intestinal stem cell activity
niec_dir = f"{RAW_DIR}/pub2_niec"
niec_paths = glob2.glob(f"{niec_dir}/*")
niec_paths.sort()
niec_labels = ["largeintestine1", "largeintestine2", "smallintestine1", "smallintestine2"]
for niec_path, niec_label in zip(niec_paths, niec_labels):
    # Will warn you that your variable names aren't unique, fixed below.
    with warnings.catch_warnings(action="ignore"):
        visium_in = sc.read_visium(niec_path)
    visium_in.var_names_make_unique()
    h5_out_path = f"{HAD_DIR}/p2_{niec_label}.h5ad"
    visium_in.write_h5ad(h5_out_path)
    print(f"Written: {h5_out_path}")

Written: /scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/h5ad/p2_largeintestine1.h5ad
Written: /scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/h5ad/p2_largeintestine2.h5ad
Written: /scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/h5ad/p2_smallintestine1.h5ad
Written: /scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/h5ad/p2_smallintestine2.h5ad


In [49]:
# Calvanese: Mapping human hematopoietic stem cells from hemogenic endothelium to birth
calv_dir = f"{RAW_DIR}/pub3_calvanese"
calv_paths = glob2.glob(f"{calv_dir}/*")
calv_paths.sort()
for idx, calv_path in enumerate(calv_paths):
    with warnings.catch_warnings(action="ignore"):
        visium_in = sc.read_visium(calv_path)
    visium_in.var_names_make_unique()
    h5_out_path = f"{HAD_DIR}/p3_hsc{idx+1}.h5ad"
    visium_in.write_h5ad(h5_out_path)
    print(f"Written: {h5_out_path}")

['/scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/raw/pub3_calvanese/HM-1', '/scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/raw/pub3_calvanese/HM-2', '/scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/raw/pub3_calvanese/HM-4', '/scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/raw/pub3_calvanese/HM-5', '/scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/raw/pub3_calvanese/HM-6', '/scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/raw/pub3_calvanese/HM-7', '/scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/raw/pub3_calvanese/HM-8']


In [90]:
# Larouche: Spatiotemporal mapping of immune and stem cell dysregulation after volumetric muscle loss
laro_dir = f"{RAW_DIR}/pub4_larouche"
laro_paths_raw = glob2.glob(f"{laro_dir}/*")
# Filters out raw RDS objects
laro_paths = [path for path in laro_paths_raw if 'GSE205707' not in path]
laro_paths.sort()
for laro_path in laro_paths:
    laro_lab = laro_path.split("/")[-1]
    visium_in = sc.read_10x_mtx(laro_path)
    coords_in = np.genfromtxt(f"{laro_path}/coords.csv", delimiter=",", dtype="int64")
    visium_in.obsm["spatial"] = coords_in
    h5_out_path = f"{HAD_DIR}/p4_{laro_lab}.h5ad"
    visium_in.write_h5ad(h5_out_path)
    print(f"Written: {h5_out_path}")

Written: /scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/h5ad/p4_caD0IR1.h5ad
Written: /scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/h5ad/p4_caD0IR2.h5ad
Written: /scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/h5ad/p4_caD14E1.h5ad
Written: /scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/h5ad/p4_caD14M1.h5ad
Written: /scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/h5ad/p4_caD14M2.h5ad
Written: /scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/h5ad/p4_caD7E1.h5ad
Written: /scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/h5ad/p4_caD7M1.h5ad
Written: /scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/h5ad/p4_caD7M2.h5ad
Written: /scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/h5ad/p4_mmD141.h5ad
Written: /scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/h5ad/p4_mmD142.h5ad
Written: /scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/h5ad/p4_mmITD1.h5ad
Written: /scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/h5ad/p4_mmITD2.h5ad
Written: /scratch/gpfs/KANG/sereno/spatialstem/

In [122]:
# Shani: The spatio-temporal program of liver zonal regeneration
shan_dir = f"{RAW_DIR}/pub5_shani"
shan_paths = glob2.glob(f"{shan_dir}/*.h5")
shan_paths.sort()
print(shan_paths)
# Read metadata to get visium coords
visium_meta_dict = {}
with open(f'{shan_dir}/Visium_Meta_data.txt', mode ='r')as meta_file:
    shan_meta = csv.reader(meta_file)
    next(shan_meta)
    for line in shan_meta:
        sample = line[1].replace("t_", "")
        x_coord = np.int64(line[4])
        y_coord = np.int64(line[5])
        coords = [x_coord, y_coord]
        if sample not in visium_meta_dict:
            visium_meta_dict[sample] = []
        visium_meta_dict[sample].append(coords)

['/scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/raw/pub5_shani/Visium_24h_m1_raw_feature_bc_matrix.h5', '/scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/raw/pub5_shani/Visium_24h_m2_raw_feature_bc_matrix.h5', '/scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/raw/pub5_shani/Visium_48h_m4_raw_feature_bc_matrix.h5', '/scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/raw/pub5_shani/Visium_48h_m5_raw_feature_bc_matrix.h5', '/scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/raw/pub5_shani/Visium_72h_m1_raw_feature_bc_matrix.h5', '/scratch/gpfs/KANG/sereno/spatialstem/sourcefiles/raw/pub5_shani/Visium_72h_m2_raw_feature_bc_matrix.h5']


In [134]:
shan_path = shan_paths[0]
# Extract mouse id
shan_lab = shan_path.split("/")[-1].replace("Visium_", "").replace("_raw_feature_bc_matrix.h5", "")
# Will warn you that your variable names aren't unique, fixed below.
with warnings.catch_warnings(action="ignore"):
    visium_in = sc.read_10x_h5(shan_path)
visium_in.var_names_make_unique()
coords = np.asarray(visium_meta_dict[shan_lab])
visium_in.obsm["spatial"] = coords
h5_out_path = f"{HAD_DIR}/p5_{shan_lab}.h5ad"
# visium_in.write_h5ad(h5_out_path)

ValueError: Value passed for key 'spatial' is of incorrect shape. Values of obsm must match dimensions ('obs',) of parent. Value had shape (3075,) while it should have had (4992,).

In [137]:
visium_in.obs_names

Index(['AAACAACGAATAGTTC-1', 'AAACAAGTATCTCCCA-1', 'AAACAATCTACTAGCA-1',
       'AAACACCAATAACTGC-1', 'AAACAGAGCGACTCCT-1', 'AAACAGCTTTCAGAAG-1',
       'AAACAGGGTCTATATT-1', 'AAACAGTGTTCCTGGG-1', 'AAACATGGTGAGAGGA-1',
       'AAACATTTCCCGGATT-1',
       ...
       'TTGTTAGCAAATTCGA-1', 'TTGTTCAGTGTGCTAC-1', 'TTGTTCTAGATACGCT-1',
       'TTGTTGGCAATGACTG-1', 'TTGTTGTGTGTCAAGA-1', 'TTGTTTCACATCCAGG-1',
       'TTGTTTCATTAGTCTA-1', 'TTGTTTCCATACAACT-1', 'TTGTTTGTATTACACG-1',
       'TTGTTTGTGTAAATTC-1'],
      dtype='object', length=4992)

In [141]:
line[0].replace(f"{line[1]}_", "").replace("_", "-")

'TTGTTTGTGTAAATTC-1'

In [142]:
barcodes = []
with open(f'{shan_dir}/Visium_Meta_data.txt', mode ='r')as meta_file:
    shan_meta = csv.reader(meta_file)
    next(shan_meta)
    for line in shan_meta:
        barcode = line[0].replace(f"{line[1]}_", "").replace("_", "-")
        barcodes.append(barcode)

In [143]:
in_obs = 0
for barcode in barcodes:
    if 

['AAACAATCTACTAGCA-1',
 'AAACACCAATAACTGC-1',
 'AAACAGTGTTCCTGGG-1',
 'AAACATTTCCCGGATT-1']