In [1]:
# Imports
import csv
import glob2
import pandas as pd
import matplotlib.pyplot as plt
import mygene
import numpy as np
import scanpy as sc
import squidpy as sq
import warnings
from anndata import AnnData
from gseapy import Biomart
from scipy import sparse

In [2]:
# Globals
PRJ_DIR = "/scratch/gpfs/KANG/sereno/spatialstem"
SRC_DIR = f"{PRJ_DIR}/sourcefiles"
RAW_DIR = f"{SRC_DIR}/raw"
HAD_DIR = f"{SRC_DIR}/h5ad"
CSV_DIR = f"{SRC_DIR}/csv"
EXP_DIR = f"{CSV_DIR}/exp"
COR_DIR = f"{CSV_DIR}/coords"
INT_DIR = f"{PRJ_DIR}/intermediates"
FIG_DIR = f"{PRJ_DIR}/figs"

In [13]:
# Get paths of H5 datasets.
h5ad_paths = glob2.glob(f"{HAD_DIR}/*.h5ad")
h5ad_paths.sort()

In [15]:
i = 0
for h5ad_path in h5ad_paths:
    ad = sc.read_h5ad(h5ad_path)
    h5ad_label = h5ad_path.replace(f"{HAD_DIR}/", "").replace(".h5ad", "")
    # Chance of lowest non-zero value being a perfect integer in log-normalized data is practically zero.
    test_count = ad.X[np.nonzero(ad.X)].min(axis=None)
    # Skips log-normalized data: cytotrace needs raw data!
    if not test_count.is_integer():
        print(f"{str(i)}: {h5ad_label} skipped: log-normalized data")
        i += 1
        continue
    csv_out_path = f"{EXP_DIR}/{h5ad_label}_exp.csv"
    spatial_out_path = f"{COR_DIR}/{h5ad_label}_coords.csv"
    # More memory efficient to call todense directly in the constructor
    # Also note that CytoTrace needs genes in rows and cells in columns
    gene_index = list(ad.var["human_symbol"])
    cell_col = range(1, ad.X.shape[0]+1)
    exp_mat_arr = np.array(ad.X.todense().transpose(), dtype='i4')
    exp_mat_df = pd.DataFrame(data=exp_mat_arr, index=gene_index, columns=cell_col)
    exp_mat_df.to_csv(csv_out_path)
    spatial_coords = ad.obsm["spatial"]
    np.savetxt(spatial_out_path, spatial_coords, delimiter=",", fmt='%f')
    print(f"{str(i)}: {h5ad_label} formatted for cytotrace")
    i += 1
    if h5ad_label[0:3]!="p01":
        break

0: p01_fetalliver_A1 formatted for cytotrace
1: p01_fetalliver_D1 formatted for cytotrace
2: p01_fetalspleen_A1 formatted for cytotrace
3: p01_fetalspleen_B1 formatted for cytotrace
4: p01_fetalspleen_C1 formatted for cytotrace
5: p01_fetalthymus_B1 formatted for cytotrace
6: p01_fetalthymus_C1 formatted for cytotrace
7: p02_largeintestine1 formatted for cytotrace
