# NL-to-SQL Inference Demo

This notebook demonstrates how to use the NL-to-SQL inference engine to convert natural language questions into SQL queries.

In [None]:
# Setup
import sys
import os
sys.path.append('..')

from app.inference import NL2SQLInference
from app.sql_executor import SQLExecutor
import pandas as pd

## Initialize Components

In [None]:
# Initialize inference engine
print("Loading NL-to-SQL model...")
inference = NL2SQLInference()
print("✅ Model loaded successfully")

# Initialize SQL executor
print("Setting up database...")
executor = SQLExecutor()
print("✅ Database ready")

## Explore Database Schema

In [None]:
# Get schema information
schema_info = executor.get_schema_info()

print("Database Tables:")
for table_name, table_info in schema_info['tables'].items():
    columns = [col['name'] for col in table_info['columns']]
    print(f"  {table_name}: {', '.join(columns)}")

## Basic NL-to-SQL Generation

In [None]:
# Test basic question
question = "Show me all customers"
print(f"Question: {question}")

# Generate SQL
result = inference.generate_sql(question)
print(f"Generated SQL: {result['sql']}")
print(f"Confidence: {result['confidence']:.2f}")

## SQL Generation with Schema Context

In [None]:
# Format schema for model
schema_str = "Tables: "
schema_parts = []
for table_name, table_info in schema_info['tables'].items():
    columns = [col['name'] for col in table_info['columns']]
    schema_parts.append(f"{table_name}({', '.join(columns)})")
schema_str += ", ".join(schema_parts)

print(f"Schema context: {schema_str[:200]}...")

In [None]:
# Test with schema context
question = "What are the total sales by region?"
print(f"Question: {question}")

result = inference.generate_sql(question, schema_str)
print(f"Generated SQL: {result['sql']}")
print(f"Confidence: {result['confidence']:.2f}")

## End-to-End Pipeline

In [None]:
def process_question(question):
    """Process a question through the complete pipeline."""
    print(f"\n🔍 Question: {question}")
    print("-" * 50)
    
    # Generate SQL
    result = inference.generate_sql(question, schema_str)
    sql = result['sql']
    confidence = result['confidence']
    
    print(f"📝 Generated SQL: {sql}")
    print(f"🎯 Confidence: {confidence:.2f}")
    
    # Execute SQL
    exec_result = executor.execute_query(sql)
    
    if exec_result['success']:
        print(f"✅ Execution successful: {exec_result['row_count']} rows returned")
        
        # Show first few results
        if exec_result['data']:
            df = pd.DataFrame(exec_result['data'])
            print("\n📊 Results:")
            print(df.head())
        else:
            print("No data returned")
    else:
        print(f"❌ Execution failed: {exec_result['error']}")
    
    return result, exec_result

In [None]:
# Test various questions
test_questions = [
    "Show me all customers",
    "What are the total sales?",
    "Find the top 5 products by price",
    "How many customers are in each region?",
    "What is the average order value?"
]

results = []
for question in test_questions:
    result, exec_result = process_question(question)
    results.append({
        'question': question,
        'sql': result['sql'],
        'confidence': result['confidence'],
        'success': exec_result['success'],
        'row_count': exec_result.get('row_count', 0)
    })

## Results Summary

In [None]:
# Create summary DataFrame
summary_df = pd.DataFrame(results)
print("\n📈 Summary of Results:")
print(summary_df)

# Calculate success rate
success_rate = summary_df['success'].mean()
avg_confidence = summary_df['confidence'].mean()

print(f"\n📊 Overall Statistics:")
print(f"Success Rate: {success_rate:.1%}")
print(f"Average Confidence: {avg_confidence:.2f}")
print(f"Total Questions: {len(results)}")

## Batch Processing

In [None]:
# Test batch generation
batch_questions = [
    "Show customers from North region",
    "List all products in Electronics category",
    "Count total orders"
]

print("🔄 Batch Processing:")
batch_results = inference.batch_generate(batch_questions, schema_str)

for i, (question, result) in enumerate(zip(batch_questions, batch_results)):
    print(f"\n{i+1}. {question}")
    print(f"   SQL: {result['sql']}")
    print(f"   Confidence: {result['confidence']:.2f}")

## Model Configuration

In [None]:
# Show current model configuration
print("🔧 Current Model Configuration:")
for key, value in inference.config.items():
    print(f"  {key}: {value}")

print(f"\n🖥️  Device: {inference.device}")
print(f"📦 Model: {inference.config.get('name', 'Unknown')}")

## Cleanup

In [None]:
# Close database connection
executor.close()
print("✅ Demo completed successfully!")