# Dataset Generation Demo

This notebook demonstrates the full dataset generation pipeline.

In [None]:
import sys
sys.path.insert(0, '..')

from src.generator.dag_factory import generate_random_dag, get_dag_hash
from src.generator.scm_sampler import SCMSampler

In [None]:
# Generate a sample DAG
G = generate_random_dag(10, edge_prob=0.3, seed=42)
dag_hash = get_dag_hash(G)

print(f"Generated DAG: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")
print(f"Hash: {dag_hash}")

In [None]:
# Sample from each rung
sampler = SCMSampler(seed=42)

for rung in [1, 2, 3]:
    sample = sampler.sample(G, dag_hash, rung)
    print(f"\nRung {rung} ({sample.query_type}):")
    print(f"  X_factual: {sample.X_factual[:5].round(3)}...")
    if sample.X_counterfactual is not None:
        print(f"  Intervention: {sample.intervention}")
        print(f"  X_counterfactual: {sample.X_counterfactual[:5].round(3)}...")