In [3]:
import os
import pandas as pd
import scanpy as sc
import numpy  as np

def generate_anndata_replicates(base_path, output_path):
    """
    Generate AnnData objects for each replicate based on the specific file naming scheme 
    and save them to the output directory.

    Parameters:
    - base_path (str): Base path containing the directories `ST_sim`, `CITE_sim`, etc.
    - output_path (str): Directory where the .h5ad files will be saved.
    """
    # Ensure output directory exists
    os.makedirs(output_path, exist_ok=True)

    # Define base directories for each dataset type
    st_gex_dir = os.path.join(base_path, 'ST_sim')
    cite_sim_dir = os.path.join(base_path, 'CITE_sim')

    # Loop through each replicate 0-4
    for i in range(5):
        # Construct paths for each file type, adjusting for specific filenames
        gene_expression_path = os.path.join(st_gex_dir, f'Wu_ST_{i}_data.csv')
        protein_expression_path = os.path.join(cite_sim_dir, f'Wu_CITE_{i}_data.csv')
        spot_index_path = os.path.join(st_gex_dir, f'Wu_ST_{i}_index.csv')
        spot_meta_path = os.path.join(st_gex_dir, f'Wu_ST_{i}_meta.csv')
        cell_type_proportions_path = os.path.join(st_gex_dir, f'Wu_ST_{i}_prop.csv')
        
        # Load replicate-specific data
        gene_expression = pd.read_csv(gene_expression_path, index_col=0)
        protein_expression = pd.read_csv(protein_expression_path, index_col=0)
        spot_index = pd.read_csv(spot_index_path)
        spot_meta = pd.read_csv(spot_meta_path, index_col=1)
        cell_type_proportions = pd.read_csv(cell_type_proportions_path, index_col=0)

        # Create AnnData objects for gene expression and protein expression
        adata_gex = sc.AnnData(gene_expression.T)
        adata_cite = sc.AnnData(protein_expression.T)
        
        # Add spatial coordinates and cell type proportions to AnnData objects
        adata_gex.obsm['spatial'] = spot_meta[['spot_x', 'spot_y']].to_numpy()
        adata_cite.obsm['spatial'] = spot_meta[['spot_x', 'spot_y']].to_numpy()
        
        adata_gex.obs = adata_gex.obs.join(cell_type_proportions, how='left')
        adata_cite.obs = adata_cite.obs.join(cell_type_proportions, how='left')

        # Prepare GEX adata
        adata_gex.X_norm_log1p = adata_gex.X
        adata_gex.X      = np.expm1(adata_gex.X_norm_log1p).round()
    
        # Save each AnnData object with unique names per replicate
        adata_gex.write(os.path.join(output_path, f"Wu_rep_{i}_GEX.h5ad"))
        adata_cite.write(os.path.join(output_path, f"Wu_rep_{i}_CITE.h5ad"))
        
        print(f"Saved Wu_ST_{i} gene expression and Wu_CITE_{i} CITE data to {output_path}")

# Define base path and output path
base_path = "/bgfs/alee/LO_LAB/Personal/Brent_Schlegel/Projects/Wu_Visium/Simulations/scCube_12k/replicates/high_seg"
output_path = os.path.join(base_path, "h5ad_objects")

# Run the function
generate_anndata_replicates(base_path, output_path)


# Define base path and output path
base_path = "/bgfs/alee/LO_LAB/Personal/Brent_Schlegel/Projects/Wu_Visium/Simulations/scCube_12k/replicates/mixed"
output_path = os.path.join(base_path, "h5ad_objects")

# Run the function
generate_anndata_replicates(base_path, output_path)

Saved Wu_ST_0 gene expression and Wu_CITE_0 CITE data to /bgfs/alee/LO_LAB/Personal/Brent_Schlegel/Projects/Wu_Visium/Simulations/scCube_12k/replicates/high_seg/h5ad_objects
Saved Wu_ST_1 gene expression and Wu_CITE_1 CITE data to /bgfs/alee/LO_LAB/Personal/Brent_Schlegel/Projects/Wu_Visium/Simulations/scCube_12k/replicates/high_seg/h5ad_objects
Saved Wu_ST_2 gene expression and Wu_CITE_2 CITE data to /bgfs/alee/LO_LAB/Personal/Brent_Schlegel/Projects/Wu_Visium/Simulations/scCube_12k/replicates/high_seg/h5ad_objects
Saved Wu_ST_3 gene expression and Wu_CITE_3 CITE data to /bgfs/alee/LO_LAB/Personal/Brent_Schlegel/Projects/Wu_Visium/Simulations/scCube_12k/replicates/high_seg/h5ad_objects
Saved Wu_ST_4 gene expression and Wu_CITE_4 CITE data to /bgfs/alee/LO_LAB/Personal/Brent_Schlegel/Projects/Wu_Visium/Simulations/scCube_12k/replicates/high_seg/h5ad_objects
Saved Wu_ST_0 gene expression and Wu_CITE_0 CITE data to /bgfs/alee/LO_LAB/Personal/Brent_Schlegel/Projects/Wu_Visium/Simulations/