# Example Workflow: Evaluating Structural Learning Impact

This notebook demonstrates the complete workflow for assessing how different structure learning algorithms affect generative model rankings.

## Step 1: Data Exploration

First, let's explore the available datasets and their properties.

In [None]:
import sys
sys.path.insert(0, '..')

from src.data.loader import DataLoader
from src.utils.graph_utils import save_graph_visualization
import pandas as pd
import matplotlib.pyplot as plt

loader = DataLoader()
datasets = loader.list_available_datasets()
print(f"Available datasets: {datasets}")

if datasets:
    dataset_name = datasets[0]
    info = loader.get_dataset_info(dataset_name)
    print(f"\nDataset: {info['name']}")
    print(f"Samples: {info['n_samples']}")
    print(f"Features: {info['n_features']}")
    print(f"Causal edges: {info['n_edges']}")
    print(f"Feature names: {info['features']}")

## Step 2: Preprocess Data

Load and preprocess a dataset, automatically detecting categorical and continuous features.

In [None]:
from src.data.preprocessor import DataPreprocessor

# Load raw data
data, structure = loader.load_dataset(dataset_name)
print(f"Loaded data shape: {data.shape}")
print(f"Structure: {structure.number_of_nodes()} nodes, {structure.number_of_edges()} edges")

# Preprocess
preprocessor = DataPreprocessor(categorical_threshold=10)
data_processed = preprocessor.fit_transform(data)

feature_info = preprocessor.get_feature_info()
print(f"\nCategorical features ({feature_info['n_categorical']}): {feature_info['categorical_features']}")
print(f"Continuous features ({feature_info['n_continuous']}): {feature_info['continuous_features']}")

## Step 3: Train Generative Models

Train multiple generative models on the preprocessed data.

In [None]:
from src.models.gmm_wrapper import GMMWrapper
from src.models.bayesian_network import BayesianNetworkWrapper

models = {}

print("Training GMM...")
gmm = GMMWrapper(n_components=5)
gmm.fit(data_processed)
models['gmm'] = gmm
print("GMM trained successfully")

print("\nTraining Bayesian Network...")
bn = BayesianNetworkWrapper()
bn.fit(data_processed)
models['bayesian_network'] = bn
print("Bayesian Network trained successfully")

## Step 4: Generate Synthetic Data

Generate synthetic samples from trained models.

In [None]:
n_samples = len(data)
synthetic_datasets = {}

for model_name, model in models.items():
    print(f"Generating {n_samples} samples from {model_name}...")
    synthetic_data = model.sample(n_samples)
    
    synthetic_data_original = preprocessor.inverse_transform(synthetic_data)
    synthetic_datasets[model_name] = synthetic_data_original
    
    print(f"Generated shape: {synthetic_data_original.shape}")
    print(f"Sample:\n{synthetic_data_original.head()}\n")

## Step 5: Learn Causal Structures

Apply different structure learning algorithms to the data.

In [None]:
from src.structure_learning.pc import PCLearner
from src.structure_learning.ges import GESLearner
from src.utils.graph_utils import compute_graph_metrics

learned_structures = {}

print("Learning structure with PC algorithm...")
pc_learner = PCLearner(alpha=0.05)
pc_graph = pc_learner.fit(data_processed)
learned_structures['pc'] = pc_graph
pc_metrics = compute_graph_metrics(structure, pc_graph)
print(f"PC: {pc_graph.number_of_edges()} edges, SHD={pc_metrics['shd']}, F1={pc_metrics['f1']:.3f}")

print("\nLearning structure with GES algorithm...")
ges_learner = GESLearner()
ges_graph = ges_learner.fit(data_processed)
learned_structures['ges'] = ges_graph
ges_metrics = compute_graph_metrics(structure, ges_graph)
print(f"GES: {ges_graph.number_of_edges()} edges, SHD={ges_metrics['shd']}, F1={ges_metrics['f1']:.3f}")

## Step 6: Evaluate Models with Different Structures

Evaluate each generative model using both ground truth and learned structures.

In [None]:
from src.evaluation.cautabbench_eval import CauTabBenchEvaluator

evaluator = CauTabBenchEvaluator(method='fisherz', alpha=0.05)

evaluation_results = {}

for struct_name in ['ground_truth', 'pc', 'ges']:
    causal_graph = structure if struct_name == 'ground_truth' else learned_structures[struct_name]
    
    print(f"\nEvaluating with {struct_name} structure:")
    print("-" * 50)
    
    struct_results = {}
    
    for model_name, synthetic_data in synthetic_datasets.items():
        real_transformed = preprocessor.transform(data)
        synth_transformed = preprocessor.transform(synthetic_data)
        
        results = evaluator.evaluate(real_transformed, synth_transformed, causal_graph)
        struct_results[model_name] = results
        
        print(f"{model_name:20s}: Quality={results['quality_score']:.4f}, "
              f"Agreement={results['agreement_rate']:.4f}")
    
    evaluation_results[struct_name] = struct_results

## Step 7: Compare Rankings

Compare how model rankings change across different structures.

In [None]:
from src.evaluation.ranking import RankingComparator

comparator = RankingComparator()

for struct_name, results in evaluation_results.items():
    rankings = [(model, res['quality_score']) for model, res in results.items()]
    rankings.sort(key=lambda x: x[1], reverse=True)
    comparator.add_ranking(struct_name, rankings)
    
    print(f"\n{struct_name} ranking:")
    for i, (model, score) in enumerate(rankings, 1):
        print(f"  {i}. {model:20s}: {score:.4f}")

if len(comparator.comparisons) > 1:
    comparison = comparator.compare_rankings('ground_truth')
    
    print("\n" + "="*50)
    print("Ranking Correlation with Ground Truth:")
    print("="*50)
    
    for struct, metrics in comparison['comparisons'].items():
        print(f"\n{struct}:")
        print(f"  Kendall's tau: {metrics['kendall_tau']:.3f}")
        print(f"  Spearman's rho: {metrics['spearman_rho']:.3f}")
        print(f"  Top model match: {metrics['top_model_match']}")
        print(f"  Avg positional difference: {metrics['avg_positional_difference']:.2f}")

## Conclusion

This workflow demonstrated:
1. Data preprocessing with automatic feature type detection
2. Training multiple generative models
3. Learning causal structures with different algorithms
4. Evaluating models using CauTabBench methodology
5. Comparing rankings across different structure learning methods

Key insights:
- Model rankings can change significantly depending on the structure used
- Some structure learning algorithms preserve ranking better than others
- The choice of structure learning algorithm matters for fair model comparison