# Figure 2: Mathematical Foundation and Theoretical Framework

This notebook generates Figure 2 showing the mathematical foundations of diffusion models adapted for biological data.

**Figure Caption**: Conceptual illustration of the mathematical foundations underlying diffusion models adapted for biological data. The diagram presents the theoretical framework showing how forward and reverse diffusion processes can be adapted to handle discrete, sparse gene expression data while maintaining biological plausibility.

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
from matplotlib.patches import FancyBboxPatch, Circle, FancyArrowPatch
import seaborn as sns

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")

# Create figure with subplots
fig = plt.figure(figsize=(16, 12))

# Main diagram
ax1 = plt.subplot2grid((3, 2), (0, 0), colspan=2, rowspan=2)
ax1.set_xlim(0, 12)
ax1.set_ylim(0, 8)
ax1.axis('off')

# Mathematical equations subplot
ax2 = plt.subplot2grid((3, 2), (2, 0))
ax2.axis('off')

# Biological adaptations subplot
ax3 = plt.subplot2grid((3, 2), (2, 1))
ax3.axis('off')

# Colors
forward_color = '#E74C3C'  # Red
reverse_color = '#3498DB'  # Blue
data_color = '#2ECC71'     # Green
noise_color = '#95A5A6'    # Gray

# Forward diffusion process
ax1.text(6, 7.5, 'Diffusion Model Mathematical Framework for Biological Data', 
         ha='center', va='center', fontsize=16, fontweight='bold')

# Data states
states = [
    ('x₀\n(Real Data)', 1, 6, data_color),
    ('x₁', 3, 6, noise_color),
    ('x₂', 5, 6, noise_color),
    ('...', 7, 6, noise_color),
    ('xₜ\n(Pure Noise)', 9, 6, noise_color)
]

for state, x, y, color in states:
    circle = Circle((x, y), 0.4, facecolor=color, alpha=0.3, edgecolor=color, linewidth=2)
    ax1.add_patch(circle)
    ax1.text(x, y, state, ha='center', va='center', fontsize=10, fontweight='bold')

# Forward arrows
for i in range(len(states)-1):
    x1, y1 = states[i][1], states[i][2]
    x2, y2 = states[i+1][1], states[i+1][2]
    if states[i+1][0] != '...':
        arrow = FancyArrowPatch((x1+0.4, y1), (x2-0.4, y2),
                               arrowstyle='->', mutation_scale=20, 
                               color=forward_color, linewidth=2)
        ax1.add_patch(arrow)
        # Add noise symbol
        ax1.text((x1+x2)/2, y1+0.3, '+ε', ha='center', va='center', 
                fontsize=12, color=forward_color, fontweight='bold')

# Reverse arrows
for i in range(len(states)-1, 0, -1):
    x1, y1 = states[i][1], states[i][2]
    x2, y2 = states[i-1][1], states[i-1][2]
    if states[i][0] != '...':
        arrow = FancyArrowPatch((x1-0.4, y1-0.8), (x2+0.4, y2-0.8),
                               arrowstyle='->', mutation_scale=20, 
                               color=reverse_color, linewidth=2)
        ax1.add_patch(arrow)
        # Add denoising symbol
        ax1.text((x1+x2)/2, y1-0.5, 'εθ(xₜ,t)', ha='center', va='center', 
                fontsize=10, color=reverse_color, fontweight='bold')

# Process labels
ax1.text(5, 6.8, 'Forward Process: q(xₜ|xₜ₋₁)', ha='center', va='center', 
         fontsize=12, color=forward_color, fontweight='bold')
ax1.text(5, 4.5, 'Reverse Process: pθ(xₜ₋₁|xₜ)', ha='center', va='center', 
         fontsize=12, color=reverse_color, fontweight='bold')

# Biological data characteristics
bio_box = FancyBboxPatch((0.5, 2), 4, 2, 
                        boxstyle="round,pad=0.1", 
                        facecolor=data_color, 
                        alpha=0.2, 
                        edgecolor=data_color, 
                        linewidth=2)
ax1.add_patch(bio_box)
ax1.text(2.5, 3.5, 'Biological Data Properties', ha='center', va='center', 
         fontsize=12, fontweight='bold', color=data_color)
ax1.text(2.5, 3, '• Discrete gene expression\n• Extreme sparsity (80-95% zeros)\n• High dimensionality (20k+ genes)', 
         ha='center', va='center', fontsize=10)
ax1.text(2.5, 2.3, '• Zero-inflated distributions\n• Technical dropout noise', 
         ha='center', va='center', fontsize=10)

# Adaptations box
adapt_box = FancyBboxPatch((7.5, 2), 4, 2, 
                          boxstyle="round,pad=0.1", 
                          facecolor='#F39C12', 
                          alpha=0.2, 
                          edgecolor='#F39C12', 
                          linewidth=2)
ax1.add_patch(adapt_box)
ax1.text(9.5, 3.5, 'Model Adaptations', ha='center', va='center', 
         fontsize=12, fontweight='bold', color='#F39C12')
ax1.text(9.5, 3, '• Discrete diffusion processes\n• Biological constraint integration\n• Sparse-aware architectures', 
         ha='center', va='center', fontsize=10)
ax1.text(9.5, 2.3, '• Zero-inflation modeling\n• Gene regulatory priors', 
         ha='center', va='center', fontsize=10)

# Mathematical equations
ax2.text(0.5, 0.8, 'Core Mathematical Framework', ha='center', va='center', 
         fontsize=14, fontweight='bold', transform=ax2.transAxes)

equations = [
    'Forward: q(x₁:T|x₀) = ∏ᵗ₌₁ᵀ q(xₜ|xₜ₋₁)',
    'q(xₜ|xₜ₋₁) = N(xₜ; √(1-βₜ)xₜ₋₁, βₜI)',
    'Reverse: pθ(x₀:T) = p(xT)∏ᵗ₌₁ᵀ pθ(xₜ₋₁|xₜ)',
    'Loss: L = E[||ε - εθ(xₜ,t)||²]'
]

for i, eq in enumerate(equations):
    ax2.text(0.05, 0.6 - i*0.15, eq, ha='left', va='center', 
             fontsize=11, transform=ax2.transAxes, 
             bbox=dict(boxstyle="round,pad=0.3", facecolor='lightblue', alpha=0.3))

# Biological adaptations
ax3.text(0.5, 0.8, 'Biological Adaptations', ha='center', va='center', 
         fontsize=14, fontweight='bold', transform=ax3.transAxes)

adaptations = [
    'Zero-inflated: p(x) = π·δ₀ + (1-π)·f(x)',
    'Discrete: q(xₜ|xₜ₋₁) = Categorical(αₜxₜ₋₁ + βₜ)',
    'Sparse loss: L = E[w(x)||ε - εθ(xₜ,t)||²]',
    'Biological prior: εθ(xₜ,t,c) with constraints c'
]

for i, adapt in enumerate(adaptations):
    ax3.text(0.05, 0.6 - i*0.15, adapt, ha='left', va='center', 
             fontsize=11, transform=ax3.transAxes,
             bbox=dict(boxstyle="round,pad=0.3", facecolor='lightgreen', alpha=0.3))

plt.tight_layout()
plt.savefig('figure_2_mathematical_foundations.png', dpi=300, bbox_inches='tight', 
            facecolor='white', edgecolor='none')
plt.show()

## Figure Description

This figure illustrates the mathematical foundations of diffusion models adapted for biological data:

1. **Forward Process**: Shows how real biological data is gradually corrupted with noise
2. **Reverse Process**: Demonstrates the learned denoising process that generates new data
3. **Biological Properties**: Highlights the unique characteristics of scRNA-seq data
4. **Model Adaptations**: Shows how diffusion models are modified for biological applications
5. **Mathematical Framework**: Core equations for diffusion processes
6. **Biological Adaptations**: Specific mathematical modifications for handling biological data

The visualization emphasizes the theoretical framework rather than empirical results, showing how mathematical concepts can be adapted for biological applications.