# Analysis: Region-Aware Matching and Downstream Analysis

This notebook covers:
1. Region-aware matching for tissue heterogeneity
2. Downstream analysis of integration results

**Prerequisites**: Run preprocessing, integration, and visualization notebooks first

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import anndata as ad
import matplotlib.pyplot as plt
from scipy import sparse
from scipy.io import mmread
import warnings
warnings.filterwarnings('ignore')

# Set plotting defaults
plt.rcParams['figure.figsize'] = [10, 8]
plt.rcParams['figure.dpi'] = 100

## Step 12: Region-Aware Matching (Advanced)

MaxFuse now supports region-aware matching with three combined approaches:
1. **Prior-weighted distances** - Favor biologically plausible matches (B cells to B follicles)
2. **Neighborhood features** - Augment CODEX features with spatial context
3. **Post-hoc filtering** - Remove implausible matches after integration

This addresses spatial heterogeneity where the tissue has distinct anatomical regions.

In [None]:
# REGION-AWARE MATCHING EXAMPLE
# This demonstrates the new spatial-aware MaxFuse features

import numpy as np
from maxfuse import Fusor
from maxfuse.core import model as mf_model
from maxfuse.core import spatial_utils

# Define marker-to-region mapping for spleen
marker_to_region = {
    'CD20': 'B_follicle',
    'CD3e': 'T_zone',
    'CD68': 'Red_pulp',
    'CD31': 'Endothelial'
}

# Define cell type to region compatibility weights
# Lower = more compatible, Higher = less compatible
celltype_region_weights = {
    'B_cell': {'B_follicle': 0.1, 'T_zone': 2.0, 'Red_pulp': 5.0, 'Endothelial': 3.0, 'mixed': 1.0},
    'T_cell': {'B_follicle': 2.0, 'T_zone': 0.1, 'Red_pulp': 3.0, 'Endothelial': 3.0, 'mixed': 1.0},
    'Macrophage': {'B_follicle': 3.0, 'T_zone': 2.0, 'Red_pulp': 0.1, 'Endothelial': 2.0, 'mixed': 1.0},
    'DC': {'B_follicle': 1.5, 'T_zone': 0.5, 'Red_pulp': 1.5, 'Endothelial': 2.0, 'mixed': 1.0},
}

# Detect tissue regions using marker expression + spatial clustering
spatial_coords = np.column_stack([
    protein_adata.obs['X_centroid'].values,
    protein_adata.obs['Y_centroid'].values
])

marker_names = list(protein_adata.var_names)
marker_expr = protein_adata.X

regions, region_info = spatial_utils.detect_tissue_regions(
    locations=spatial_coords,
    marker_expression=marker_expr,
    marker_names=marker_names,
    marker_to_region=marker_to_region,
    n_neighbors=30,
    min_cluster_size=20
)

print("Detected tissue regions:")
for region, count in region_info['region_counts'].items():
    print(f"  {region}: {count} cells")

# Visualize detected regions
fig, ax = plt.subplots(figsize=(10, 8))
region_colors = {'B_follicle': 'blue', 'T_zone': 'green', 
                 'Red_pulp': 'red', 'Endothelial': 'purple', 'mixed': 'lightgray'}
colors = [region_colors.get(r, 'gray') for r in regions]
ax.scatter(spatial_coords[:, 0], spatial_coords[:, 1], c=colors, s=0.5, alpha=0.5)
ax.set_title('Detected Tissue Regions')
ax.set_xlabel('X (µm)')
ax.set_ylabel('Y (µm)')
ax.set_aspect('equal')

# Add legend
from matplotlib.patches import Patch
legend_elements = [Patch(facecolor=c, label=r) for r, c in region_colors.items()]
ax.legend(handles=legend_elements, loc='upper right')
plt.tight_layout()
plt.show()

print(f"\nTo use region-aware matching, modify the Fusor initialization:")
print("""
# Initialize with spatial coordinates
fusor = Fusor(
    shared_arr1=rna_shared, shared_arr2=protein_shared,
    active_arr1=rna_active, active_arr2=protein_active,
    locations2=spatial_coords,  # NEW: spatial coordinates
)

# After split_into_batches and construct_graphs...

# Configure region priors
fusor.set_region_priors(
    rna_labels=rna_celltypes,
    spatial_regions=regions,
    celltype_to_region_weights=celltype_region_weights,
    wt_on_prior=0.3  # 30% weight on priors
)

# Run matching with priors
fusor.find_initial_pivots(use_region_priors=True)
fusor.refine_pivots(use_region_priors=True)

# Optionally configure post-hoc filter for forbidden matches
fusor.set_posthoc_filter(
    rna_labels=rna_celltypes,
    spatial_regions=regions,
    forbidden_pairs=[('B_cell', 'Red_pulp'), ('Macrophage', 'B_follicle')]
)

# Get filtered results
matching = fusor.get_matching(apply_posthoc_filter=True, verbose=True)
""")