# 03. Text2SQL Basic

This notebook covers:
- Understanding text2sql concepts
- Generating SQL from natural language
- Executing generated queries
- Handling errors and validation
- Logging query history

## 1. Import Libraries

In [None]:
import sys
sys.path.append('/workspace')

from src.utils.db_utils import DatabaseConnection, get_database_context
from src.utils.text2sql_utils import Text2SQLGenerator, execute_text2sql, get_few_shot_examples
import pandas as pd

print("âœ“ Libraries imported successfully")

## 2. Initialize Components

In [None]:
# Initialize database connection
db = DatabaseConnection()

# Initialize text2sql generator
try:
    generator = Text2SQLGenerator(llm_provider="ollama", model_name="llama2")
    print("âœ“ Text2SQL generator initialized")
except Exception as e:
    print(f"âš  Could not initialize Text2SQL generator: {e}")
    print("Make sure Ollama is running and a model is installed")
    print("Run: docker exec -it text2sql-ollama ollama pull llama2")

## 3. Get Database Schema Context

In [None]:
# Get database schema for context
schema_context = get_database_context()
print("Database Schema Context:")
print("="*60)
print(schema_context)

## 4. Review Few-Shot Examples

In [None]:
# Get few-shot examples that help the LLM understand the task
examples = get_few_shot_examples()
print("Few-Shot Examples:")
print("="*60)
print(examples)

## 5. Simple Text2SQL Examples

In [None]:
# Example 1: Simple SELECT query
natural_query = "Show me all employees"

print(f"Natural Language Query: {natural_query}")
print("\nGenerating SQL...")

try:
    sql_query = generator.generate_sql(natural_query, schema_context, examples)
    print(f"\nGenerated SQL:\n{sql_query}")
    
    # Execute the query
    result_df = db.execute_query_df(sql_query)
    print(f"\nResults ({len(result_df)} rows):")
    display(result_df)
except Exception as e:
    print(f"Error: {e}")

In [None]:
# Example 2: Query with filtering
natural_query = "Find all employees with salary greater than 6000000"

print(f"Natural Language Query: {natural_query}")
print("\nGenerating SQL...")

try:
    sql_query = generator.generate_sql(natural_query, schema_context, examples)
    print(f"\nGenerated SQL:\n{sql_query}")
    
    result_df = db.execute_query_df(sql_query)
    print(f"\nResults ({len(result_df)} rows):")
    display(result_df)
except Exception as e:
    print(f"Error: {e}")

In [None]:
# Example 3: Query with JOIN
natural_query = "Show me all employees in the Engineering department"

print(f"Natural Language Query: {natural_query}")
print("\nGenerating SQL...")

try:
    sql_query = generator.generate_sql(natural_query, schema_context, examples)
    print(f"\nGenerated SQL:\n{sql_query}")
    
    result_df = db.execute_query_df(sql_query)
    print(f"\nResults ({len(result_df)} rows):")
    display(result_df)
except Exception as e:
    print(f"Error: {e}")

In [None]:
# Example 4: Aggregate query
natural_query = "What is the total sales amount by region?"

print(f"Natural Language Query: {natural_query}")
print("\nGenerating SQL...")

try:
    sql_query = generator.generate_sql(natural_query, schema_context, examples)
    print(f"\nGenerated SQL:\n{sql_query}")
    
    result_df = db.execute_query_df(sql_query)
    print(f"\nResults ({len(result_df)} rows):")
    display(result_df)
except Exception as e:
    print(f"Error: {e}")

## 6. Using Complete Pipeline

In [None]:
# Use the complete pipeline with logging
natural_query = "List all active projects with their department names"

print(f"Natural Language Query: {natural_query}")
print("\nExecuting complete text2sql pipeline...")

result = execute_text2sql(db, natural_query, log_execution=True)

print(f"\nSuccess: {result['success']}")
print(f"Execution Time: {result['execution_time_ms']} ms")
print(f"\nGenerated SQL:\n{result['sql_query']}")

if result['success']:
    print(f"\nResults ({result['row_count']} rows):")
    display(result['results'])
else:
    print(f"\nError: {result['error']}")

## 7. Try Your Own Queries

In [None]:
# Interactive query testing
test_queries = [
    "How many employees are in each department?",
    "What is the average salary by job title?",
    "Show me the top 5 customers by total purchase amount",
    "List all projects that ended in 2023",
    "Find employees who are managers"
]

# Test each query
for query in test_queries:
    print("\n" + "="*80)
    print(f"Query: {query}")
    print("="*80)
    
    result = execute_text2sql(db, query, log_execution=True)
    
    if result['success']:
        print(f"Generated SQL: {result['sql_query']}")
        print(f"\nResults ({result['row_count']} rows):")
        display(result['results'].head())
    else:
        print(f"Error: {result['error']}")

## 8. View Query History

In [None]:
# Check query history
history_query = """
SELECT 
    query_id,
    natural_language_query,
    execution_success,
    execution_time_ms,
    result_count,
    created_at
FROM query_history
ORDER BY created_at DESC
LIMIT 10
"""

history_df = db.execute_query_df(history_query)
print("Recent Query History:")
display(history_df)

In [None]:
# Query success statistics
stats_query = """
SELECT 
    COUNT(*) as total_queries,
    SUM(CASE WHEN execution_success THEN 1 ELSE 0 END) as successful,
    SUM(CASE WHEN NOT execution_success THEN 1 ELSE 0 END) as failed,
    ROUND(AVG(execution_time_ms), 2) as avg_execution_time_ms,
    ROUND(AVG(result_count), 2) as avg_result_count
FROM query_history
"""

stats_df = db.execute_query_df(stats_query)
print("Query Statistics:")
display(stats_df)

## 9. Error Handling Examples

In [None]:
# Try a query that might fail
ambiguous_query = "Show me the data"

print(f"Testing ambiguous query: {ambiguous_query}")
result = execute_text2sql(db, ambiguous_query, log_execution=True)

print(f"\nSuccess: {result['success']}")
if result['success']:
    print(f"Generated SQL: {result['sql_query']}")
    display(result['results'].head())
else:
    print(f"Error: {result['error']}")
    print("\nðŸ’¡ Tip: Be more specific in your query!")

## Summary

In this notebook, you learned:
- âœ“ How to generate SQL from natural language queries
- âœ“ How to use schema context and few-shot examples
- âœ“ How to execute and validate generated queries
- âœ“ How to log queries for monitoring
- âœ“ How to handle errors in text2sql

Next: Move to `04_agent_workflow.ipynb` to build agent workflows with LangChain/LangGraph.