# 06. 종단간(End-to-End) 워크플로우

이 노트북은 완전한 워크플로우를 시연합니다:
1. 사용자가 자연어로 질문
2. RAG가 관련 컨텍스트 검색
3. Text2SQL이 SQL 쿼리 생성
4. 쿼리 실행
5. 결과 시각화
6. 모든 것이 모니터링을 위해 로깅됨

In [None]:
import sys
sys.path.append('/workspace')

from src.utils.db_utils import DatabaseConnection, get_database_context
from src.utils.text2sql_utils import execute_text2sql
from src.utils.embedding_utils import search_similar_documents
from src.utils.viz_utils import auto_visualize
import pandas as pd
import time

print("✓ Libraries imported successfully")

## 완전한 파이프라인 함수

In [None]:
def complete_text2sql_workflow(user_question: str, use_rag: bool = True, 
                               visualize: bool = True):
    """
    Complete end-to-end workflow for text2sql with RAG and visualization
    """
    print("="*80)
    print(f"User Question: {user_question}")
    print("="*80)
    
    db = DatabaseConnection()
    start_time = time.time()
    
    # 1단계: RAG - 관련 컨텍스트 검색
    if use_rag:
        print("\n[1/5] Retrieving relevant documentation...")
        docs = search_similar_documents(db, user_question, limit=2)
        if docs:
            print(f"  Found {len(docs)} relevant documents:")
            for _, title, _, similarity in docs:
                print(f"    - {title} (similarity: {similarity:.4f})")
    
    # 2단계: 데이터베이스 스키마 가져오기
    print("\n[2/5] Loading database schema...")
    schema = get_database_context()
    print(f"  Schema loaded ({len(schema)} characters)")
    
    # 3단계: SQL 생성 및 실행
    print("\n[3/5] Generating SQL query...")
    result = execute_text2sql(db, user_question, log_execution=True)
    
    if not result['success']:
        print(f"  ✗ Error: {result['error']}")
        return None
    
    print(f"  ✓ SQL generated successfully")
    print(f"\n  Generated SQL:\n  {result['sql_query']}")
    
    # 4단계: 결과 표시
    print(f"\n[4/5] Query executed successfully")
    print(f"  Execution time: {result['execution_time_ms']} ms")
    print(f"  Rows returned: {result['row_count']}")
    
    print("\n  Results:")
    display(result['results'])
    
    # 5단계: 시각화
    if visualize and result['row_count'] > 1:
        print("\n[5/5] Creating visualization...")
        viz = auto_visualize(result['results'], user_question)
        if viz is not None and hasattr(viz, 'show'):
            viz.show()
        print("  ✓ Visualization created")
    
    total_time = (time.time() - start_time) * 1000
    print(f"\n{'='*80}")
    print(f"Total pipeline time: {total_time:.2f} ms")
    print(f"{'='*80}\n")
    
    return result

## 테스트 케이스

In [None]:
# 테스트 1: 간단한 집계
complete_text2sql_workflow(
    "What is the total sales amount by region?",
    use_rag=True,
    visualize=True
)

In [None]:
# 테스트 2: 필터링이 있는 조인
complete_text2sql_workflow(
    "Show me all employees in the Engineering department with their salaries",
    use_rag=True,
    visualize=True
)

In [None]:
# 테스트 3: 복잡한 집계
complete_text2sql_workflow(
    "What is the average salary by department?",
    use_rag=True,
    visualize=True
)

In [None]:
# 테스트 4: 여러 조인
complete_text2sql_workflow(
    "List all active projects with their departments and team members",
    use_rag=True,
    visualize=False  # May not be suitable for visualization
)

## 성능 모니터링

In [None]:
# 최근 쿼리 성능 확인
db = DatabaseConnection()

perf_query = """
SELECT 
    natural_language_query,
    execution_success,
    execution_time_ms,
    result_count,
    created_at
FROM query_history
ORDER BY created_at DESC
LIMIT 10
"""

perf_df = db.execute_query_df(perf_query)
print("Recent Query Performance:")
display(perf_df)

In [None]:
# 성능 통계
stats_query = """
SELECT 
    COUNT(*) as total_queries,
    AVG(execution_time_ms) as avg_time_ms,
    MIN(execution_time_ms) as min_time_ms,
    MAX(execution_time_ms) as max_time_ms,
    SUM(CASE WHEN execution_success THEN 1 ELSE 0 END)::float / COUNT(*) * 100 as success_rate
FROM query_history
"""

stats_df = db.execute_query_df(stats_query)
print("\nOverall Statistics:")
display(stats_df)

## Langfuse 통합 (선택사항)

In [None]:
# 예제: 모니터링을 위한 Langfuse 통합
# Langfuse가 구성된 경우 주석 해제

# from langfuse import Langfuse
# import os

# langfuse = Langfuse(
#     public_key=os.getenv('LANGFUSE_PUBLIC_KEY', 'pk-lf-test'),
#     secret_key=os.getenv('LANGFUSE_SECRET_KEY', 'sk-lf-test'),
#     host=os.getenv('LANGFUSE_HOST', 'http://localhost:53002')
# )

# # Create a trace for the text2sql workflow
# trace = langfuse.trace(
#     name="text2sql-query",
#     user_id="notebook-user",
#     metadata={"environment": "jupyter"}
# )

# # Add a generation span
# generation = trace.generation(
#     name="sql-generation",
#     model="llama2",
#     model_parameters={"temperature": 0.1},
#     input={"question": "What is the total sales?"},
#     output={"sql": "SELECT SUM(total_amount) FROM sales"}
# )

# # End the generation
# generation.end()

# # Flush to send data to Langfuse
# langfuse.flush()

print("Langfuse integration example (commented out)")
print("Configure LANGFUSE_PUBLIC_KEY and LANGFUSE_SECRET_KEY to enable")
print("Note: Create API keys from Langfuse UI at http://localhost:53002")

## 요약

This notebook demonstrated:
- ✓ 완전한 종단간 Text2SQL 워크플로우
- ✓ 컨텍스트 검색을 위한 RAG 통합
- ✓ 결과의 자동 시각화
- ✓ 성능 모니터링 및 로깅
- ✓ 관측성 도구를 위한 통합 지점

## 다음 단계

1. 다양한 쿼리로 실험하기
2. RAG 시스템에 더 많은 문서 추가하기
3. 모니터링을 위한 Langfuse 구성하기
4. Ollama로 다른 LLM 모델 시도하기
5. 이러한 유틸리티를 사용하여 커스텀 애플리케이션 구축하기