# Integrated Spatial Analysis with Cell2location

This notebook performs:
1. Integration of CD45- and CD45+ scRNA-seq datasets
2. Cell2location deconvolution using integrated reference
3. Neighborhood enrichment analysis
4. Spatial interaction graph construction

In [1]:
# Install dependencies
!pip install cell2location scvi-tools --quiet

In [2]:
# Import libraries
import scanpy as sc
import cell2location
import scvi
import matplotlib.pyplot as plt

## Data Integration

We first concatenate CD45- and CD45+ scRNA-seq datasets to create a comprehensive reference.

In [6]:
# Load preprocessed data
adata_sc = sc.read('./data/scRNA_seq/GSE194247/adata_sc_preprocessed.h5ad')
adata_sc.X = adata_sc.raw.X
adata_sc.obs_names_make_unique()
adata_sp = sc.read('./data/spatial/GSE235315/adata_spatial_preprocessed_merged.h5ad')
adata_sp.obs_names_make_unique()

ValueError: Data matrix has wrong shape (29717, 33538), need to be (29717, 3000).

In [None]:
# Load both datasets
adata_cd45neg = sc.read('./data/scRNA_seq/GSE194247/adata_sc_preprocessed.h5ad')
adata_cd45pos = sc.read('./data/scRNA_seq/GSE235449/adata_immune_preprocessed.h5ad')

# Concatenate datasets
adata_sc = adata_cd45neg.concatenate(adata_cd45pos, 
    batch_key='dataset',
    batch_categories=['CD45neg', 'CD45pos'])

# Store raw counts
adata_sc.raw = adata_sc.copy()
adata_sc.obs_names_make_unique()

# Load spatial data
adata_sp = sc.read('./data/spatial/GSE235315/adata_spatial_preprocessed_merged.h5ad')
adata_sp.obs_names_make_unique()

In [5]:
# Train reference model (scRNA-seq)
scvi.model.SCVI.setup_anndata(adata_sc, labels_key='cell_type')
sc_model = scvi.model.SCVI(adata_sc)
sc_model.train()
adata_sc.obsm['scVI'] = sc_model.get_latents()

  self.validate_field(adata)


KeyError: 'cell_type not found in adata.obs.'

In [None]:
# Export cell-type signatures
from cell2location.models import RegressionModel
reg_model = RegressionModel(adata_sc)
reg_model.train(max_epochs=250)
inf_aver = reg_model.get_cluster_gene_expression()
inf_aver.save('./results/cell_abundances/reference_signatures.h5ad')

In [None]:
# Prepare spatial data for Cell2location
cell2location.models.Cell2location.setup_anndata(adata_sp)
mod = cell2location.models.Cell2location(
    adata_sp, cell_state_df=inf_aver, N_cells_per_location=30, detection_alpha=20
)
mod.train(max_epochs=300)

## Cell2location Analysis

We use Cell2location with:
- Integrated reference data
- 5% quantile estimates for abundance
- Downstream spatial analysis parameters

In [None]:
# Export estimated cell-type abundance
adata_sp = mod.export_posterior(adata_sp, sample_kwargs={'num_samples': 1000})
adata_sp.write('../results/cell_abundances/adata_sp_cell2location.h5ad')
print('✅ Deconvolution complete.')

## Neighborhood Enrichment Analysis

1. Identify high spots (5% quantile abundance > 3)
2. Calculate neighbor abundances up to 3rd degree
3. Compare observed vs expected profiles
4. Generate enrichment profiles

In [None]:
def calculate_neighborhood_enrichment(adata, abundance_key='means_cell_abundance_w_sf', threshold=3):
    """Calculate neighborhood enrichment for cell types"""
    # Calculate spatial neighbors
    sc.pp.neighbors(adata, n_neighbors=10, use_rep='spatial')
    
    # Identify high spots
    high_spots = {}
    for ct in adata.obsm[abundance_key].columns:
        high_spots[ct] = adata.obs_names[
            adata.obsm[abundance_key][ct] > threshold
        ].tolist()
    
    # Calculate neighbor abundances
    enrichment_profiles = {}
    for ct in high_spots:
        # Get neighboring spots
        neighbors = adata.obsp['distances'].tocsr()[adata.obs_names.isin(high_spots[ct])]
        neighbor_idx = neighbors.indices[neighbors.data <= 3]
        
        # Calculate observed abundances
        observed = adata.obsm[abundance_key].iloc[neighbor_idx].sum()
        
        # Calculate expected abundances
        n_neighbors = len(neighbor_idx)
        expected = adata.obsm[abundance_key].mean() * n_neighbors
        
        # Calculate enrichment
        enrichment_profiles[ct] = observed / expected
    
    return pd.DataFrame(enrichment_profiles)

# Calculate enrichment
enrichment_df = calculate_neighborhood_enrichment(adata_sp)

# Generate interaction graph
interaction_matrix = (enrichment_df > 1) & (enrichment_df > 1).T

# Save results
adata_sp.uns['neighborhood_enrichment'] = enrichment_df.to_dict()
adata_sp.uns['interaction_matrix'] = interaction_matrix.to_dict()

# Save processed data
adata_sp.write('./results/spatial_analysis/adata_sp_analyzed.h5ad')

In [None]:
# Example visualization: spatial distribution of a cell type
import seaborn as sns
sc.pl.spatial(adata_sp, color=['Cancer_BasalLike', 'Cancer_Classical'], cmap='Reds', spot_size=30)




















plt.show()plt.title('Cell-Cell Interactions')        node_size=1000, font_size=8)nx.draw(G, with_labels=True, node_color='lightblue', plt.figure(figsize=(8, 8))G = nx.from_pandas_adjacency(interaction_matrix)# Plot interaction graphplt.show()plt.tight_layout()axes[1].set_title('Neighborhood Enrichment')sns.heatmap(enrichment_df, cmap='RdBu_r', center=1, ax=axes[1])# Plot enrichment heatmap             cmap='Reds', spot_size=30, ax=axes[0])



adata_sp.uns['q05_cell_abundance'] = q05q05 = np.quantile(adata_sp.obsm['q05_cell_abundance_w_sf'], axis=0)

# Calculate 5% quantile for each cell type



)    sample_kwargs={'num_samples': 1000}
    adata_sp, adata_sp = mod.export_posterior(
# Get 5% quantile estimatesmod.train(max_epochs=300)


)    detection_alpha=20    N_cells_per_location=30,     cell_state_df=inf_aver, sc.pl.spatial(adata_sp, color=['Cancer_BasalLike', 'Cancer_Classical'], 
    adata_sp, mod = cell2location.models.Cell2location(
fig, axes = plt.subplots(1, 2, figsize=(12, 5))cell2location.models.Cell2location.setup_anndata(adata_sp)# Prepare spatial data for Cell2locationimport networkx as nx

# Plot spatial abundances

## Visualization

1. Spatial abundance plots
2. Neighborhood enrichment heatmap
3. Cell-cell interaction graph