# SUTRA - Complete Usage Guide

This notebook demonstrates how to use SUTRA library step by step.

## Step 1: Install SUTRA

```bash
pip install sutra
```

## Step 2: Import SUTRA

In [None]:
from sutra import SutraClient
import pandas as pd

## Step 3: Enter Your OpenAI API Key

You can provide the API key in two ways:
1. Directly when creating the client
2. As an environment variable

In [None]:
# Method 1: Direct API key
client = SutraClient(api_key="your-openai-api-key-here")

# Method 2: Using environment variable
# import os
# os.environ['OPENAI_API_KEY'] = 'your-key-here'
# client = SutraClient()

## Step 4: Upload Data

SUTRA supports multiple file formats:
- CSV files
- Excel files (.xlsx, .xls)
- JSON files
- PDF files
- Word documents (.docx)
- Text files (.txt)
- HTML files
- Pandas DataFrames

In [None]:
# Upload CSV file
client.upload_data("sales_data.csv")

# Upload Excel with custom table name
client.upload_data("products.xlsx", table_name="products")

# Upload JSON
client.upload_data("customers.json")

In [None]:
# Or upload a pandas DataFrame directly
df = pd.DataFrame({
    'product': ['A', 'B', 'C'],
    'price': [10, 20, 30],
    'quantity': [100, 200, 150]
})

client.upload_dataframe(df, table_name="inventory")

## Step 5: View Database Tables

After uploading data, the database is automatically created and managed.

In [None]:
# List all tables
tables = client.list_tables()
print("Available tables:", tables)

In [None]:
# Get detailed information about a table
info = client.get_table_info("sales_data")
print("\nTable columns:")
for col in info['columns']:
    print(f"  - {col['name']}: {col['type']}")

print("\nSample data:")
pd.DataFrame(info['sample_data'])

## Step 6: Direct SQL Queries (Optional)

You can access the database directly with SQL without using the API.

In [None]:
# Execute SQL directly
result = client.execute_sql("SELECT * FROM sales_data LIMIT 10")

# Display results
pd.DataFrame(result['results'])

In [None]:
# Execute SQL with visualization
result = client.execute_sql(
    "SELECT region, SUM(amount) as total FROM sales_data GROUP BY region",
    visualize=True
)

# Display results
pd.DataFrame(result['results'])

## Step 7: Natural Language Queries

Ask questions in plain English!

In [None]:
# Simple query
result = client.query("What are the total sales?")

# Display results
pd.DataFrame(result['results'])

In [None]:
# More complex queries
result = client.query("Show me the top 5 products by revenue")
pd.DataFrame(result['results'])

In [None]:
result = client.query("What is the average order value by region?")
pd.DataFrame(result['results'])

In [None]:
# See the generated SQL (for debugging)
result = client.query(
    "Show monthly sales trend",
    return_sql=True
)

print("Generated SQL:", result['sql_query'])
pd.DataFrame(result['results'])

## Step 8: Visualization

Choose whether you want visualization or not.

In [None]:
# Query WITH visualization (default)
result = client.query(
    "Show me sales trend over the last 6 months",
    visualize=True
)

# Display data
pd.DataFrame(result['results'])

# Visualization is automatically shown if available

In [None]:
# Query WITHOUT visualization
result = client.query(
    "List all customers from California",
    visualize=False
)

pd.DataFrame(result['results'])

## Additional Features

### Provide Feedback to Improve Results

In [None]:
# If a query worked well
client.provide_feedback(
    query="Show total sales",
    generated_sql="SELECT SUM(amount) FROM sales",
    is_correct=True
)

# If a query didn't work and you know the correct SQL
client.provide_feedback(
    query="Show average price",
    generated_sql="SELECT AVG(price) FROM products",
    is_correct=False,
    correct_sql="SELECT AVG(unit_price) FROM products WHERE active = 1"
)

### View Database Schema

In [None]:
# Get full schema
schema = client.get_schema()
print(schema)

### Clear Cache

In [None]:
# Clear query cache
client.clear_cache()

### Close Connection

In [None]:
# When done, close the database connection
client.close()

## Complete Example: End-to-End Workflow

In [None]:
from sutra import SutraClient
import pandas as pd

# 1. Initialize
client = SutraClient(api_key="your-key")

# 2. Create sample data
sales_df = pd.DataFrame({
    'date': pd.date_range('2024-01-01', periods=100),
    'product': ['A', 'B', 'C'] * 33 + ['A'],
    'amount': [100, 200, 150] * 33 + [100],
    'region': ['North', 'South', 'East', 'West'] * 25
})

# 3. Upload data
client.upload_dataframe(sales_df, "sales")

# 4. Check tables
client.list_tables()

# 5. Ask natural language questions
result1 = client.query("What are the total sales?")
print("\nTotal Sales:")
print(pd.DataFrame(result1['results']))

result2 = client.query("Show me sales by region", visualize=True)
print("\nSales by Region:")
print(pd.DataFrame(result2['results']))

# 6. Direct SQL if needed
result3 = client.execute_sql("SELECT product, AVG(amount) FROM sales GROUP BY product")
print("\nAverage by Product:")
print(pd.DataFrame(result3['results']))

# 7. Close
client.close()