# Synthetic Data Validation for PROB
This notebook generates synthetic gene expression data, runs the PROB pipeline, and checks results.

In [None]:
import numpy as np
from your_prob_module import Progression_Inference, ODE_BayesianLasso  # Import your functions

In [None]:
# Set random seed and parameters
np.random.seed(42)
n_genes, n_samples = 20, 50
t = np.linspace(0, 1, n_samples)
stages = np.clip(np.digitize(t, np.linspace(0, 1, 5)) - 1, 1, 4)

In [None]:
# Generate synthetic expression data
expr = np.zeros((n_genes, n_samples))
for g in range(n_genes):
    if g < 8:
        expr[g, :] = t * (1 + np.random.randn(n_samples)*0.2) + np.random.randn(n_samples)*0.1
    elif g < 16:
        expr[g, :] = (1-t) * (1 + np.random.randn(n_samples)*0.2) + np.random.randn(n_samples)*0.1
    else:
        expr[g, :] = np.random.randn(n_samples) + 1
expr = np.maximum(expr, 0.1)
X_stage = np.vstack([expr, stages])
print(f'Test data shape: {X_stage.shape}')

In [None]:
# Step 1: Pseudotemporal progression inference
Data_ordered, PPD, TimeSampled = Progression_Inference(X_stage)
print(f'PPD shape: {PPD.shape}')

In [None]:
# Step 2: Gene regulatory network inference
Para_Post_pdf, S, AM = ODE_BayesianLasso(Data_ordered, TimeSampled)
print(f'Adjacency matrix shape: {AM.shape}')

In [None]:
# Validation assertions
assert Data_ordered.shape == (n_genes, n_samples), 'Data_ordered shape mismatch'
assert S.shape == (n_genes, n_genes), 'S shape mismatch'
assert AM.shape == (n_genes, n_genes), 'AM shape mismatch'
print('âœ“ ALL TESTS PASSED')
print(f'Step 1 - Progression correlation: {np.corrcoef(t, PPD)[0,1]:.4f}')
print(f'Step 2 - Network: {np.sum(AM!=0)} edges, density={np.sum(AM!=0)/(n_genes*(n_genes-1)):.4f}')