# Group-wise Comparisons

Differential expression and composition analysis between groups.

**Input:** Spatial analysis data
**Output:** DE results and statistical comparisons

In [None]:
import sys
sys.path.append('..')
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from scipy import stats

DATA_DIR = Path('../data/processed')
FIGURES_DIR = Path('../figures/04_group_comparisons')
FIGURES_DIR.mkdir(parents=True, exist_ok=True)
SAMPLE_NAME = 'phenocycler_sample_01'

In [None]:
adata = sc.read_h5ad(DATA_DIR / f'{SAMPLE_NAME}_spatial_analysis.h5ad')
print(f'Loaded: {adata.shape}')
print(f'Available metadata: {list(adata.obs.columns)}')

## Cell Type Composition Analysis

In [None]:
composition = adata.obs.groupby(['sample', 'celltype']).size().unstack(fill_value=0)
composition_pct = composition.div(composition.sum(axis=1), axis=0) * 100
print('Cell type composition (%):')
print(composition_pct)

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
composition_pct.T.plot(kind='bar', stacked=True, ax=ax, colormap='tab20')
ax.set_ylabel('Percentage')
ax.set_xlabel('Cell Type')
ax.set_title('Cell Type Composition')
plt.xticks(rotation=45, ha='right')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.savefig(FIGURES_DIR / 'composition.png', dpi=300, bbox_inches='tight')
plt.show()

## Differential Expression

In [None]:
# Example: Compare between cell types
if 'celltype' in adata.obs.columns:
    sc.tl.rank_genes_groups(adata, 'celltype', method='wilcoxon')
    sc.pl.rank_genes_groups(adata, n_genes=10, sharey=False, save='_de_celltypes.png')
    
    # Export DE results
    result = adata.uns['rank_genes_groups']
    groups = result['names'].dtype.names
    de_df = pd.DataFrame({
        group + '_' + key: result[key][group]
        for group in groups for key in ['names', 'scores', 'pvals', 'logfoldchanges']
    })
    de_df.to_csv(DATA_DIR / f'{SAMPLE_NAME}_de_results.csv')
    print('DE results saved')

## Volcano Plot

In [None]:
if 'rank_genes_groups' in adata.uns:
    result = adata.uns['rank_genes_groups']
    group = result['names'].dtype.names[0]
    
    fig, ax = plt.subplots(figsize=(10, 8))
    logfc = result['logfoldchanges'][group]
    pvals = result['pvals'][group]
    names = result['names'][group]
    
    # -log10 p-values
    log_pvals = -np.log10(pvals + 1e-300)
    
    # Scatter plot
    ax.scatter(logfc, log_pvals, alpha=0.5, s=20)
    
    # Label significant genes
    sig_mask = (np.abs(logfc) > 0.5) & (pvals < 0.05)
    for i, (fc, pv, name) in enumerate(zip(logfc[sig_mask], log_pvals[sig_mask], names[sig_mask])):
        if i < 10:  # Label top 10
            ax.text(fc, pv, name, fontsize=8)
    
    ax.axhline(-np.log10(0.05), color='red', linestyle='--', alpha=0.5)
    ax.axvline(-0.5, color='blue', linestyle='--', alpha=0.5)
    ax.axvline(0.5, color='blue', linestyle='--', alpha=0.5)
    ax.set_xlabel('Log2 Fold Change')
    ax.set_ylabel('-Log10 P-value')
    ax.set_title(f'Volcano Plot - {group}')
    plt.tight_layout()
    plt.savefig(FIGURES_DIR / 'volcano_plot.png', dpi=300, bbox_inches='tight')
    plt.show()

In [None]:
print('Group comparisons analysis complete!')
print(f'Results saved in: {FIGURES_DIR}')