# SQL Query Buddy - Testing Notebook

This notebook demonstrates how to use SQL Query Buddy to convert natural language questions into SQL queries, execute them, and get AI-driven insights.


In [None]:
# Install required packages (run once)
# !pip install -r requirements.txt


In [None]:
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Set your OpenAI API key if not in .env file
# os.environ["OPENAI_API_KEY"] = "your-api-key-here"

# Database configuration
DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///sample_database.db")
VECTOR_DB_PATH = os.getenv("VECTOR_DB_PATH", "./vector_store")


In [None]:
# Setup sample database (run once to create sample data)
from setup_sample_database import create_sample_database

# Create sample database
create_sample_database("sample_database.db")
print("Sample database created!")


In [None]:
# Initialize SQL Query Buddy components
from vector_store import VectorStoreManager
from sql_generator import SQLGenerator
from query_executor import QueryExecutor
from insight_generator import InsightGenerator
from context_manager import ContextManager

# Initialize vector store
print("Initializing vector store...")
vector_store = VectorStoreManager(
    database_url=DATABASE_URL,
    vector_db_path=VECTOR_DB_PATH
)
vector_store.build_vector_store(include_samples=True)
print("✅ Vector store ready!")

# Initialize SQL generator
sql_generator = SQLGenerator(
    vector_store_manager=vector_store,
    model_name="gpt-4-turbo-preview",
    temperature=0.1
)

# Initialize query executor
query_executor = QueryExecutor(
    database_url=DATABASE_URL,
    sql_generator=sql_generator
)

# Initialize insight generator
insight_generator = InsightGenerator(
    model_name="gpt-4-turbo-preview",
    temperature=0.3
)

# Initialize context manager
context_manager = ContextManager(max_history=20)

print("✅ All components initialized!")


In [None]:
# Example 1: Simple query
question = "Show me the top 5 customers by total sales"

# Generate SQL
print("Question:", question)
print("\n" + "="*80)
sql_result = sql_generator.generate_sql(
    question=question,
    conversation_history=context_manager.get_conversation_history()
)

print("\nGenerated SQL:")
print(sql_result["sql"])

print("\nExplanation:")
print(sql_result["explanation"])

# Execute query
results = query_executor.execute_safe_query(sql_result["sql"], return_dataframe=True)

if results["success"]:
    print(f"\n✅ Query executed successfully! ({results['row_count']} rows)")
    print("\nResults:")
    display(results["data"])
    
    # Generate insights
    insights = insight_generator.generate_insights(
        query=sql_result["sql"],
        results=results,
        original_question=question
    )
    print("\n" + "="*80)
    print("AI Insights:")
    print(insights)
    
    # Update context
    context_manager.add_exchange(
        question=question,
        sql_query=sql_result["sql"],
        results=results,
        explanation=sql_result["explanation"],
        insights=insights
    )
else:
    print(f"\n❌ Error: {results['error']}")


In [None]:
# Example 2: Follow-up question (uses context from previous query)
question = "Now filter them to California only"

print("Question:", question)
print("\n" + "="*80)

sql_result = sql_generator.generate_sql(
    question=question,
    conversation_history=context_manager.get_conversation_history()
)

print("\nGenerated SQL:")
print(sql_result["sql"])

print("\nExplanation:")
print(sql_result["explanation"])

# Execute query
results = query_executor.execute_safe_query(sql_result["sql"], return_dataframe=True)

if results["success"]:
    print(f"\n✅ Query executed successfully! ({results['row_count']} rows)")
    print("\nResults:")
    display(results["data"])
    
    # Generate insights
    insights = insight_generator.generate_insights(
        query=sql_result["sql"],
        results=results,
        original_question=question
    )
    print("\n" + "="*80)
    print("AI Insights:")
    print(insights)
    
    # Update context
    context_manager.add_exchange(
        question=question,
        sql_query=sql_result["sql"],
        results=results,
        explanation=sql_result["explanation"],
        insights=insights
    )
else:
    print(f"\n❌ Error: {results['error']}")


## Example 3: Complex Multi-Table Query


In [None]:
# Example 3: Complex query with joins
question = "What's the total revenue from product sales by category this year?"

print("Question:", question)
print("\n" + "="*80)

sql_result = sql_generator.generate_sql(
    question=question,
    conversation_history=context_manager.get_conversation_history()
)

print("\nGenerated SQL:")
print(sql_result["sql"])

print("\nExplanation:")
print(sql_result["explanation"])

# Execute query
results = query_executor.execute_safe_query(sql_result["sql"], return_dataframe=True)

if results["success"]:
    print(f"\n✅ Query executed successfully! ({results['row_count']} rows)")
    print("\nResults:")
    display(results["data"])
    
    # Generate insights
    insights = insight_generator.generate_insights(
        query=sql_result["sql"],
        results=results,
        original_question=question
    )
    print("\n" + "="*80)
    print("AI Insights:")
    print(insights)
    
    # Update context
    context_manager.add_exchange(
        question=question,
        sql_query=sql_result["sql"],
        results=results,
        explanation=sql_result["explanation"],
        insights=insights
    )
else:
    print(f"\n❌ Error: {results['error']}")


## Example 4: Query Optimization


In [None]:
# Example 4: Get optimization suggestions
sample_query = """
SELECT c.first_name, c.last_name, o.total_amount, o.order_date
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
WHERE o.order_date > '2024-01-01'
ORDER BY o.total_amount DESC
"""

print("Original Query:")
print(sample_query)

optimization = query_executor.get_optimization_suggestions(sample_query)

print("\n" + "="*80)
print("Optimization Suggestions:")
print(optimization["suggestions"])


## Example 5: View Conversation History


In [None]:
# View conversation summary
summary = context_manager.get_summary()
print("Conversation Summary:")
print(f"  Total queries: {summary['total_queries']}")
print(f"  Successful queries: {summary['successful_queries']}")
print(f"  Success rate: {summary['success_rate']:.2%}")
print(f"  Total rows returned: {summary['total_rows_returned']}")
print(f"  Conversation turns: {summary['conversation_turns']}")

print("\n" + "="*80)
print("Recent Context:")
print(context_manager.get_recent_context(n=3))


## Example 6: Test RAG Schema Retrieval


In [None]:
# Test RAG: Search for relevant schema information
test_query = "customer sales revenue"

print(f"Searching for relevant schemas for: '{test_query}'")
print("\n" + "="*80)

relevant_schemas = vector_store.search_relevant_schemas(test_query, k=3)

for i, schema in enumerate(relevant_schemas, 1):
    print(f"\n--- Result {i} ---")
    print(schema[:500] + "..." if len(schema) > 500 else schema)
