# Unified Terraform Optimization Pipeline

This notebook combines all our optimization experiments with advanced monitoring and orchestration:

1. MLflow Integration:
   - Experiment tracking
   - Model versioning
   - Hyperparameter optimization
   - Artifact management

2. KubeFlow Integration:
   - Pipeline orchestration
   - Distributed training
   - Resource management
   - Model serving

3. LangFuse Integration:
   - Inference monitoring
   - Cost tracking
   - Performance analysis
   - Quality metrics

4. Advanced Features:
   - Multi-task optimization
   - Continuous fine-tuning
   - PR generation
   - A/B testing

In [None]:
import os
import mlflow
import kfp
import kfp.dsl as dsl
from kfp.components import create_component_from_func
import pytorch_lightning as pl
from langfuse import Langfuse
import optuna
from datetime import datetime
from typing import Dict, List, Optional
from dataclasses import dataclass

# Setup monitoring
from model_training.monitoring import MonitoringCallback, InferenceMonitor

# Initialize platforms
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("terraform_unified_optimization")
client = kfp.Client(host="http://localhost:8000")

# Load config
config = {}
with open("/workspace/configs/api_keys.env") as f:
    for line in f:
        if '=' in line:
            key, value = line.strip().split('=', 1)
            config[key] = value

## 1. Define Optimization Targets

Set up different optimization scenarios

In [None]:
@dataclass
class OptimizationTarget:
    name: str
    resource_types: List[str]
    metrics: List[str]
    constraints: Dict[str, any]
    success_criteria: Dict[str, float]

TARGETS = {
    "cost": OptimizationTarget(
        name="cost_optimization",
        resource_types=["aws_instance", "aws_eks_cluster", "aws_lambda_function"],
        metrics=["cost", "performance", "availability"],
        constraints={
            "min_availability": 0.99,
            "max_response_time": 200
        },
        success_criteria={
            "cost_reduction": 0.10,
            "performance_degradation_max": 0.05
        }
    ),
    "security": OptimizationTarget(
        name="security_optimization",
        resource_types=["aws_security_group", "aws_iam_role", "aws_kms_key"],
        metrics=["compliance_score", "attack_surface", "cost"],
        constraints={
            "min_compliance": 0.95,
            "max_cost_increase": 0.15
        },
        success_criteria={
            "compliance_improvement": 0.20,
            "attack_surface_reduction": 0.30
        }
    ),
    "performance": OptimizationTarget(
        name="performance_optimization",
        resource_types=["aws_rds_instance", "aws_elasticache_cluster", "aws_elasticsearch_domain"],
        metrics=["latency", "throughput", "cost"],
        constraints={
            "max_cost_increase": 0.20,
            "min_availability": 0.995
        },
        success_criteria={
            "latency_reduction": 0.25,
            "throughput_improvement": 0.15
        }
    )
}

## 2. Data Pipeline Components

Enhanced data processing with KubeFlow

In [None]:
def process_terraform_data(data_path: str, target: str) -> Dict[str, str]:
    """Process Terraform dataset with target-specific filtering"""
    import sqlite3
    import pandas as pd
    from pathlib import Path
    
    # Load TerraDS database
    conn = sqlite3.connect(data_path)
    
    # Get target configuration
    target_config = TARGETS[target]
    
    # Extract relevant resources
    resource_types = "','".join(target_config.resource_types)
    df = pd.read_sql(f"""
        SELECT r.*, m.repository_id 
        FROM resources r 
        JOIN modules m ON r.module_id = m.id
        WHERE r.is_managed = 1
        AND r.type IN ('{resource_types}')
    """, conn)
    
    # Save processed data
    output_dir = Path("/tmp/processed_data")
    output_dir.mkdir(exist_ok=True)
    
    # Split into train/validation
    train_df = df.sample(frac=0.8, random_state=42)
    val_df = df.drop(train_df.index)
    
    # Save datasets
    train_path = output_dir / f"{target}_train.parquet"
    val_path = output_dir / f"{target}_val.parquet"
    
    train_df.to_parquet(train_path)
    val_df.to_parquet(val_path)
    
    return {
        "train": str(train_path),
        "validation": str(val_path)
    }

# Create KubeFlow component
process_data_op = create_component_from_func(
    func=process_terraform_data,
    base_image='python:3.10',
    packages_to_install=['pandas', 'pyarrow', 'sqlite3']
)

## 3. Advanced Training Pipeline

Multi-task training with MLflow tracking

In [None]:
def train_model(
    data_paths: Dict[str, str],
    target: str,
    hyperparams: Dict
) -> str:
    """Train model with comprehensive monitoring"""
    import mlflow
    from model_training.continuous_finetuning import TerraformMultiTaskOptimizer
    
    # Initialize monitoring
    monitor = MonitoringCallback(
        langfuse_public_key=config["LANGFUSE_PUBLIC_KEY"],
        langfuse_secret_key=config["LANGFUSE_SECRET_KEY"]
    )
    
    with mlflow.start_run() as run:
        # Log configuration
        mlflow.log_params({
            **hyperparams,
            "target": target,
            "resource_types": TARGETS[target].resource_types
        })
        
        # Initialize model
        model = TerraformMultiTaskOptimizer(
            model_name=hyperparams["model_name"],
            targets=TARGETS,
            learning_rate=hyperparams["learning_rate"]
        )
        
        # Configure trainer
        trainer = pl.Trainer(
            max_epochs=hyperparams["epochs"],
            accelerator="gpu",
            devices=1,
            precision=16,
            callbacks=[monitor],
            accumulate_grad_batches=hyperparams.get("grad_accum", 4)
        )
        
        # Train model
        trainer.fit(
            model,
            train_dataloaders=create_dataloader(data_paths["train"]),
            val_dataloaders=create_dataloader(data_paths["validation"])
        )
        
        # Log final metrics
        metrics = trainer.callback_metrics
        mlflow.log_metrics({
            k: v.item() for k, v in metrics.items()
        })
        
        # Save model
        mlflow.pytorch.log_model(model, "model")
        
        return run.info.run_id

# Create KubeFlow component
train_model_op = create_component_from_func(
    func=train_model,
    base_image='pytorch/pytorch:2.2.0-cuda12.1-cudnn8-runtime',
    packages_to_install=['mlflow', 'pytorch-lightning', 'langfuse']
)

## 4. Evaluation and PR Generation

Advanced evaluation with multiple criteria

In [None]:
def evaluate_and_generate_pr(
    run_id: str,
    target: str,
    eval_data: str
) -> Dict[str, str]:
    """Evaluate model and generate PR if successful"""
    import mlflow
    from model_training.monitoring import InferenceMonitor
    
    # Initialize monitoring
    monitor = InferenceMonitor(
        langfuse_public_key=config["LANGFUSE_PUBLIC_KEY"],
        langfuse_secret_key=config["LANGFUSE_SECRET_KEY"],
        model_version=run_id
    )
    
    # Start evaluation trace
    trace = monitor.start_optimization_trace(
        resource_type=TARGETS[target].resource_types[0],
        optimization_target=target
    )
    
    try:
        # Load model
        model = mlflow.pytorch.load_model(f"runs:/{run_id}/model")
        
        # Evaluate model
        metrics = evaluate_model(model, eval_data)
        
        # Log evaluation metrics
        with mlflow.start_run(run_id=run_id):
            mlflow.log_metrics(metrics)
        
        # Check success criteria
        success = all(
            metrics[metric] >= threshold
            for metric, threshold in TARGETS[target].success_criteria.items()
        )
        
        if success:
            # Register model
            mv = mlflow.register_model(
                f"runs:/{run_id}/model",
                f"terraform-optimizer-{target}"
            )
            
            # Generate PR
            pr_info = generate_optimization_pr(
                model=model,
                target=target,
                metrics=metrics
            )
            
            return {
                "model_version": mv.version,
                "pr_url": pr_info["pr_url"],
                "improvements": str(pr_info["improvements"])
            }
        
        return {
            "model_version": "",
            "pr_url": "",
            "improvements": str(metrics)
        }
        
    finally:
        trace.end()

# Create KubeFlow component
evaluate_op = create_component_from_func(
    func=evaluate_and_generate_pr,
    base_image='pytorch/pytorch:2.2.0-cuda12.1-cudnn8-runtime',
    packages_to_install=['mlflow', 'langfuse']
)

## 5. Unified Pipeline Definition

Complete pipeline with all components

In [None]:
@dsl.pipeline(
    name='terraform-unified-pipeline',
    description='End-to-end Terraform optimization pipeline'
)
def optimization_pipeline(
    data_path: str = "/network/mlops/datasets/TerraDS.sqlite",
    target: str = "cost",
    hyperparams: Dict = {
        "model_name": "nuibang/Cline_FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview",
        "learning_rate": 2e-5,
        "epochs": 10,
        "grad_accum": 4
    }
):
    # Process data
    with dsl.ParallelFor(list(TARGETS.keys())) as target:
        process_data = process_data_op(
            data_path=data_path,
            target=target
        )
        
        # Train model
        train = train_model_op(
            data_paths=process_data.output,
            target=target,
            hyperparams=hyperparams
        )
        
        # Evaluate and generate PR
        evaluate = evaluate_op(
            run_id=train.output,
            target=target,
            eval_data=process_data.outputs["validation"]
        )
        
        # Deploy if evaluation successful
        with dsl.Condition(evaluate.outputs["model_version"] != ""):
            deploy_op = kfp.dsl.ContainerOp(
                name='deploy-model',
                image='kfserving-deployer:latest',
                command=['python', 'deploy.py'],
                arguments=[
                    '--model-name', f'terraform-optimizer-{target}',
                    '--version', evaluate.outputs["model_version"]
                ]
            )

## 6. Run Pipeline

Execute the complete pipeline with monitoring

In [None]:
# Submit pipeline
run = client.create_run_from_pipeline_func(
    optimization_pipeline,
    arguments={
        'data_path': "/network/mlops/datasets/TerraDS.sqlite",
        'target': "cost",
        'hyperparams': {
            "model_name": "nuibang/Cline_FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview",
            "learning_rate": 2e-5,
            "epochs": 10,
            "grad_accum": 4
        }
    }
)

# Print monitoring URLs
print(f"Pipeline run: {run.run_id}")
print(f"MLflow UI: http://localhost:5000")
print(f"KubeFlow UI: http://localhost:8000")
print(f"LangFuse UI: http://localhost:3000")