In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import numpy as np
import pandas as pd
import scanpy as sc
import squidpy as sq
import anndata as ad
import scipy as sp

In [2]:
input_dir = '../../results/01_generate_simulate_data'

In [3]:
# # 10X visium
# dataset_list = [#'00_scDesign3_example_data',
#                 '01_10x_Visium_mouse_brain',
#                 '02_10x_Visium_Human_Heart_MI',
#                 '03_10x_Visium_Human_Colorectal_Cancer',
#                 '04_10x_Visium_Human_Heart',
#                 '05_10x_Visium_Mouse_Embryo',
#                 '06_10x_Visium_Mouse_Olfactory_Bulb',
#                 '07_10x_Visium_Human_Breast_Cancer',
#                 '08_10x_Visium_Human_Lymph_Node',
#                 '09_10x_Visium_Human_Prostate',
#                 '10_10x_Visium_Human_Prostate_Cancer',
#                 '11_10x_Visium_Human_Cerebellum',
#                 '12_10x_Visium_Mouse_Kidney',
#                 '13_10x_Visium_Human_Lung_Cancer',
#                 '14_10x_Visium_Human_Brain_Cancer',
#                 '15_10x_Visium_Human_Kidney',
#                 '16_10x_Visium_Human_Intestine_Cancer',
#                 '17_10x_Visium_Human_Skin_Melanoma',
#                 '18_10x_Visium_Human_Cervical_Cancer',
#                 '19_10x_Visium_Human_Breast_Cancer',
#                 '20_10x_Visium_Human_Heart_MI'
#                ]

In [4]:
# # Slide-seq V2
# dataset_list = ['21_slide_seqv2_Mouse_Olfactory_Bulb',
#                 '22_slide_seqv2_Mouse_Cortex',
#                 '23_slide_seqv2_Mouse_Cerebellum',
#                 '24_slide_seqv2_Mouse_Hippocampus',
#                 '25_slide_seqv2_Mouse_SomatosensoryCortex'
#                ]

In [5]:
# # DBiT-seq
# dataset_list = ['26_DBiT_seq_E10_brain',
#                 '27_DBiT_seq_E10_eye',
#                 '28_DBiT_seq_E10_whole',
#                 '29_DBiT_seq_E11_lower_body',
#                 '30_DBiT_seq_E11_FL_1L',
#                 '31_DBiT_seq_E11_FL_2L'
#                ]

In [6]:
# # Xenium
# dataset_list = ['32_10x_Xenium_Human_Colon_Cancer',
#                 '33_10x_Xenium_Mouse_Brain'
#                ]

In [7]:
# # MERFISH
# dataset_list = ['34_MERFISH_Human_cortex',
#                 '35_MERFISH_Human_cortex',
#                 '36_MERFISH_Human_cortex',
#                 '37_MERFISH_Human_cortex',
#                 '38_MERFISH_Mouse_cortex',
#                ]

In [8]:
# # seqFISH
# dataset_list = ['39_seqFISH_Mouse_Organogenesis',
#                ]

In [9]:
# # Slide-tag
# dataset_list = ['40_slide_tag_mouse_embryo',
#                 '41_slide_tag_human_cortex',
#                 '42_slide_tag_human_skin_melanoma',
#                 '43_slide_tag_human_tonsil'
#                ]

In [10]:
# Stereo-seq
dataset_list = ['44_stereo_seq_Drosophila_embryo_E7',
                '45_stereo_seq_Drosophila_embryo_E9.1',
                '46_stereo_seq_Drosophila_embryo_E6.3',
                '47_stereo_seq_Drosophila_embryo_E10.5',
                '48_stereo_seq_Drosophila_embryo_E5.6'
               ]

In [11]:
# # STARMap
# dataset_list = ['49_STARmap_Wang2018_2D_zstep10_0',
#                 '50_STARmap_Wang2018_2D_zstep15_0'
#                ]

In [12]:
for dataset in dataset_list:
    if os.path.exists(f'{input_dir}/{dataset}.h5ad'):
        continue
    
    print(dataset)
    
    df_loc = pd.read_csv(f"{input_dir}/{dataset}/location.csv", index_col=0)
    df_count = pd.read_csv(f'{input_dir}/{dataset}/counts.csv', 
                           index_col=0).transpose()
    
    # obtain spatial variability (alpha) for each gene
    df_var = pd.DataFrame(data={'feature_name': df_count.columns})
    df_var[['gene', 'spatial_var']] = df_var['feature_name'].str.split('_', expand=True)
    df_var = df_var.set_index('feature_name', drop=True)
    
    # create AnnData object
    counts = sp.sparse.csr_matrix(df_count.values)
    adata = ad.AnnData(counts, obs=df_loc, 
                       obsm={"spatial": df_loc[['spatial1', 'spatial2']].values},
                       var=df_var, dtype=np.float32)
    
    sc.pp.calculate_qc_metrics(adata, percent_top=[10])
    
    adata.layers['counts'] = adata.X.copy()
    spatial_key = "spatial"
    library_id = "tissue"
    adata.uns[spatial_key] = {library_id: {}}
    
    #sc.pp.normalize_total(adata)
    #sc.pp.log1p(adata)
    
    adata.write_h5ad(f'{input_dir}/{dataset}.h5ad')

44_stereo_seq_Drosophila_embryo_E7
45_stereo_seq_Drosophila_embryo_E9.1
46_stereo_seq_Drosophila_embryo_E6.3
47_stereo_seq_Drosophila_embryo_E10.5
48_stereo_seq_Drosophila_embryo_E5.6
