In [3]:
# Suppress warnings before importing packages
import matplotlib.pyplot as plt
import numpy as np
import scanpy as sc
import dask
import seaborn as sns
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

# Configure dask before importing packages that use it
dask.config.set({"dataframe.query-planning": True})

# Now import packages


<dask.config.set at 0x7f3cebcf6350>

In [4]:
# 1. Load the data
print("Loading data...")
adata = sc.read_h5ad(
    "../../data/Kidney_ST/GSE211785_7_13_23_slide0_annotated_iPTsubclusters.h5ad")

# 2. CRITICAL FIX: Convert coordinates to Numpy Array
# (You must run this every time you load this file!)
if hasattr(adata.obsm['spatial'], 'to_numpy'):
    print("Fixing spatial coordinates format...")
    adata.obsm['spatial'] = adata.obsm['spatial'].to_numpy()
elif not isinstance(adata.obsm['spatial'], np.ndarray):
    adata.obsm['spatial'] = np.array(adata.obsm['spatial'])

# 3. Ensure spatial coordinates are ready for plotting
print("Spatial coordinates ready")
print(f"  Shape: {adata.obsm['spatial'].shape}")
print(f"  Type: {type(adata.obsm['spatial'])}")

print("Data loaded!")


Loading data...


Fixing spatial coordinates format...
Spatial coordinates ready
  Shape: (558601, 2)
  Type: <class 'numpy.ndarray'>
Data loaded!


In [5]:
import muspan as ms
import numpy as np
import matplotlib.pyplot as plt
import scanpy as sc

# ==========================================
# 1. DATA CONVERSION (Scanpy -> MuSpAn)
# ==========================================
def create_muspan_domain(adata, condition_name):
    # Filter for the specific condition (Healthy vs Disease)
    subset = adata[adata.obs['type'] == condition_name]
    
    # 1. Create the Domain (The Container)
    domain = ms.domain(f"Kidney_{condition_name}")
    
    # 2. Add Points (Cell Coordinates)
    # Ensure coordinates are standard Numpy array
    coords = np.array(subset.obsm['spatial'])
    domain.add_points(coords, 'Cells')
    
    # 3. Add Labels (Cell Types & Module Scores)
    # Categorical: Cell Types
    cell_types = subset.obs['cellType_CosMx_2'].astype(str).values
    domain.add_labels('CellType', cell_types)
    
    # Continuous: Module Scores (Hypoxia/Inflammation)
    # (Assuming you calculated these in the previous step)
    if 'Hypoxia' in subset.obs.columns:
        domain.add_labels('Hypoxia', subset.obs['Hypoxia'].values)
        
    print(f"Created domain for {condition_name} with {len(coords)} cells.")
    return domain

# Load your data
# adata = sc.read_h5ad("your_file.h5ad") 
domain_disease = create_muspan_domain(adata, 'Disease')
domain_healthy = create_muspan_domain(adata, 'Healthy')

# ==========================================
# 2. PERFORM SPATIAL STATISTICS
# ==========================================

# --- STATISTIC A: Ripley's L-Function (Global Clustering) ---
# "Are Immune cells clustered or random?"
# We use 'cross_l_function' (Besag's L) which is a normalized version of Ripley's K.
# L(r) - r > 0 means Clustering. L(r) - r = 0 means Random.

r = np.linspace(0, 500, 50) # Analyze from 0 to 500 microns

l_results = ms.spatial_statistics.cross_l_function(
    domain_disease, 
    population_A=ms.query.query(domain_disease, ('label', 'CellType'), 'is', 'Immune'),
    population_B=ms.query.query(domain_disease, ('label', 'CellType'), 'is', 'Immune'), # Auto-correlation
    radii=r
)

# --- STATISTIC B: Pair Correlation Function (PCF) (Interaction) ---
# "Do Injured TAL cells ATTRACT Immune cells?"
# This is the "Smoking Gun" for recruitment.
# Peak at 20-50um = Direct recruitment.
# Peak at 0um = Co-localization.

pcf_results = ms.spatial_statistics.cross_pair_correlation_function(
    domain_disease,
    population_A=ms.query.query(domain_disease, ('label', 'CellType'), 'is', 'Injured TAL'), # The Source
    population_B=ms.query.query(domain_disease, ('label', 'CellType'), 'is', 'Immune'),      # The Responder
    min_r=0, max_r=200, step=5
)

# --- STATISTIC C: Moran's I (Signal Autocorrelation) ---
# "Is the Hypoxia signal random noise, or does it form a 'Niche'?"
# High Moran's I (>0.3) = Spatially coherent niches (Biological Reality).
# Low Moran's I (~0) = Random noise (Technical Artefact).

moran_val, _ = ms.spatial_statistics.morans_i(
    domain_disease,
    label_name='Hypoxia'
)

# ==========================================
# 3. VISUALIZE RESULTS
# ==========================================
fig, ax = plt.subplots(1, 2, figsize=(14, 6))

# Plot L-Function (Clustering)
# Normalize: Plot (L(r) - r) vs r
l_values = l_results['L'] - r
ax[0].plot(r, l_values, label='Immune Clustering', color='blue', linewidth=2)
ax[0].axhline(0, linestyle='--', color='gray', label='Random (CSR)')
ax[0].set_title("Ripley's L: Immune Clustering", fontsize=14)
ax[0].set_xlabel("Distance (microns)")
ax[0].set_ylabel("L(r) - r")
ax[0].legend()

# Plot PCF (Recruitment)
pcf_vals = pcf_results['g']
pcf_r = pcf_results['r']
ax[1].plot(pcf_r, pcf_vals, label='Injured TAL -> Immune', color='red', linewidth=2)
ax[1].axhline(1, linestyle='--', color='gray', label='Random Expectation')
ax[1].set_title("Pair Correlation: Recruitment Signal", fontsize=14)
ax[1].set_xlabel("Distance (microns)")
ax[1].set_ylabel("Probability (g)")
ax[1].legend()

plt.suptitle(f"Spatial Proof of Mechanism (Moran's I = {moran_val:.2f})", fontsize=16)
plt.show()

Created domain for Disease with 274960 cells.
Created domain for Healthy with 283641 cells.


TypeError: cross_l_function() got an unexpected keyword argument 'radii'

In [None]:
import muspan as ms
import numpy as np
import matplotlib.pyplot as plt

# Assume 'domain' is already created from your adata (as per previous step)

# ==========================================
# 1. DEFINE POPULATIONS
# ==========================================
# We define our "Villains" (Injured TAL) and "Hunters" (Immune)
query_injured = ms.query.query(
    domain, ('label', 'CellType'), 'is', 'Injured TAL')
query_immune = ms.query.query(domain, ('label', 'CellType'), 'is', 'Immune')

# ==========================================
# 2. RUN THE STATISTICS
# ==========================================

# --- A. Cross-Pair Correlation Function (The Recruitment Proof) ---
# Check distances from 0 to 100 microns
pcf_result = ms.spatial_statistics.cross_pair_correlation_function(
    domain,
    population_A=query_injured,  # Reference (Source)
    population_B=query_immune,  # Target (Responder)
    min_r=0, max_r=100, step=2
)

# --- B. Average Nearest Neighbor Index (The Swarming Score) ---
anni_result = ms.spatial_statistics.average_nearest_neighbour_index(
    domain,
    population=query_immune
)

# --- C. Topographical Correlation Map (The Niche Map) ---
# This generates a localized heatmap of interaction
tcm_result = ms.spatial_statistics.topographical_correlation_map(
    domain,
    population_A=query_injured,
    population_B=query_immune,
    radius=50  # Look for correlations within 50 microns
)

# ==========================================
# 3. VISUALIZATION (Publication Quality)
# ==========================================
fig, ax = plt.subplots(1, 3, figsize=(20, 6))

# Plot 1: PCF (Recruitment)
# Look for the PEAK > 1 at r ~ 20um
ax[0].plot(pcf_result['r'], pcf_result['g'], color='red', lw=2)
ax[0].axhline(1, linestyle='--', color='grey')
ax[0].set_title("Recruitment Signal (PCF)", fontsize=14, fontweight='bold')
ax[0].set_xlabel("Distance ($\mu m$)")
ax[0].set_ylabel("Probability of Co-localization")

# Plot 2: ANNI (Swarming Text)
# Just displaying the score clearly
score = anni_result['anni']
ax[1].text(0.5, 0.5, f"ANNI Score:\n{score:.2f}",
           fontsize=30, ha='center', va='center', fontweight='bold')
ax[1].set_title("Swarming Intensity (R)", fontsize=14, fontweight='bold')
ax[1].axis('off')

# Plot 3: TCM (The Map)
# MuSpAn has a built-in visualizer for this complex map
ms.visualise.visualise(
    domain,
    objects=tcm_result['tcm_scores'],  # Color by the correlation score
    ax=ax[2],
    cmap='coolwarm',
    title="Maladaptive Niche Map"
)

plt.tight_layout()
plt.show()
