# Snowflake AI Cost Toolkit - Setup and Data Population

This notebook sets up the complete Snowflake AI Cost Toolkit including:
1. Database schema creation (tables, views, procedures)
2. Data population from Cortex Analyst logs
3. Budget alerting system setup

## 1. Import Required Libraries and Setup Session


In [None]:
# Import required libraries
import pandas as pd
from snowflake.snowpark.context import get_active_session

# Import utility functions
from utils import (
    fetch_semantic_model_paths,
    get_cortex_analyst_logs,
    write_logs_to_table,
    create_sf_intelligence_query_history
)

# Get active session
session = get_active_session()
print("✅ Session initialized successfully")


## 2. Create Database and Schema


In [None]:
# Create database and schema
session.sql("CREATE DATABASE IF NOT EXISTS CORTEX_ANALYTICS").collect()
session.sql("USE DATABASE CORTEX_ANALYTICS").collect()
session.sql("CREATE SCHEMA IF NOT EXISTS PUBLIC").collect()
session.sql("USE SCHEMA PUBLIC").collect()

print("✅ Database and schema created/selected: CORTEX_ANALYTICS.PUBLIC")


## 3. Create Core Tables


In [None]:
# Create Cortex Analyst Logs table
create_cortex_logs_table = """
CREATE OR REPLACE TABLE CORTEX_ANALYST_LOGS (
    TIMESTAMP                TIMESTAMP_NTZ,
    REQUEST_ID               STRING,
    SEMANTIC_MODEL_NAME      STRING,
    TABLES_REFERENCED        STRING,
    USER_NAME                STRING,
    SOURCE                   STRING,
    FEEDBACK                 STRING,
    RESPONSE_STATUS_CODE     INTEGER,
    USER_QUESTION            STRING,
    LATENCY_MS               NUMBER,
    GENERATED_SQL            STRING,
    ORCHESTRATION_PATH       STRING,
    QUESTION_CATEGORY        STRING,
    VERIFIED_QUERY_NAME      STRING,
    VERIFIED_QUERY_QUESTION  STRING,
    QUERY_TYPE               STRING,
    CORTEX_ANALYST_CREDITS   FLOAT
)
"""

session.sql(create_cortex_logs_table).collect()
print("✅ CORTEX_ANALYST_LOGS table created")

# Create the query history table using the utility function
target_table = "CORTEX_ANALYTICS.PUBLIC.SF_INTELLIGENCE_QUERY_HISTORY"

try:
    create_sf_intelligence_query_history(session, target_table)
    print(f"✅ {target_table} created successfully")
except Exception as e:
    print(f"⚠️  Warning: Could not create query history table: {e}")
    print("This may be due to insufficient permissions on ACCOUNT_USAGE views")


## 4. Fetch Semantic Model Paths and Populate Data


In [None]:
# Fetch semantic model paths from all agents
try:
    df_results = fetch_semantic_model_paths(session)
    print(f"✅ Found {len(df_results)} semantic model configurations")
    display(df_results)
except Exception as e:
    print(f"⚠️  Warning: Could not fetch semantic model paths: {e}")
    print("This may be because no Cortex Agents are configured in your account")
    df_results = pd.DataFrame()


In [None]:
# Populate Cortex Analyst Logs - following the pattern from your example
if not df_results.empty and 'semantic_model_file' in df_results.columns:
    semantic_model_files = df_results['semantic_model_file'].dropna().unique().tolist()
    print(f"📄 Processing {len(semantic_model_files)} unique semantic model files")
    
    for file in semantic_model_files:
        if file != None:
            print(f"\n📊 Processing: {file}")
            
            try:
                # Get logs for this semantic model
                df = get_cortex_analyst_logs(session, file)
                
                if not df.empty:
                    print(f"   ✅ Retrieved {len(df)} log entries")
                    
                    # Write to table using pandas write
                    session.write_pandas(
                        df,
                        table_name="CORTEX_ANALYST_LOGS",
                        auto_create_table=False,
                        overwrite=False  # Append to existing data
                    )
                    print(f"   ✅ Data written to CORTEX_ANALYST_LOGS table")
                else:
                    print(f"   ⚠️  No log entries found for this semantic model")
                    
            except Exception as e:
                print(f"   ❌ Error processing {file}: {e}")
                continue
    
    print("\n🎉 Completed processing all semantic model files")
else:
    print("⚠️  No semantic model files found to process")
    print("Please ensure you have Cortex Agents configured with semantic models")


## 5. Verify Data Population


In [None]:
# Check how many records were loaded
record_count = session.sql("SELECT COUNT(*) as total_records FROM CORTEX_ANALYST_LOGS").collect()[0]['TOTAL_RECORDS']
print(f"📊 Total records in CORTEX_ANALYST_LOGS table: {record_count:,}")

if record_count > 0:
    # Show sample data
    sample_data = session.sql("""
        SELECT 
            semantic_model_name,
            COUNT(*) as log_count,
            MIN(timestamp) as earliest_log,
            MAX(timestamp) as latest_log
        FROM CORTEX_ANALYST_LOGS 
        GROUP BY semantic_model_name 
        ORDER BY log_count DESC
    """).to_pandas()
    
    print("\n📈 Summary by Semantic Model:")
    display(sample_data)
    
    # Show query type breakdown
    query_types = session.sql("""
        SELECT 
            query_type,
            COUNT(*) as count,
            ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) as percentage
        FROM CORTEX_ANALYST_LOGS 
        GROUP BY query_type
        ORDER BY count DESC
    """).to_pandas()
    
    print("\n🔍 Query Type Breakdown:")
    display(query_types)
else:
    print("\n⚠️  No data was loaded. This could be because:")
    print("   • No Cortex Agents are configured")
    print("   • No queries have been made to the agents yet")
    print("   • Semantic model files are not accessible")


## 6. Setup Summary and Next Steps


In [None]:
# Final summary
print("🎉 SNOWFLAKE AI COST TOOLKIT SETUP COMPLETE!\n")
print("✅ Database objects created:")
print("   • CORTEX_ANALYTICS database and PUBLIC schema")
print("   • CORTEX_ANALYST_LOGS table")
print("   • SF_INTELLIGENCE_QUERY_HISTORY table")

record_count = session.sql("SELECT COUNT(*) as count FROM CORTEX_ANALYST_LOGS").collect()[0]['COUNT']
print(f"\n📊 Data populated: {record_count:,} Cortex Analyst log records")

print("\n🚀 Next steps:")
print("   1. Run the remaining setup.sql script for views and procedures")
print("   2. Deploy the Streamlit dashboard for interactive analysis")
print("   3. Set up notification integrations for budget alerts")
print("   4. Configure automated tasks for regular monitoring")

print("\n📚 Available files:")
print("   • setup.sql - Contains views, procedures, and budget alerting")
print("   • budget_alerting_examples.sql - Detailed alerting examples")
print("   • streamlit_app.py - Interactive dashboard")
print("   • utils.py - All utility functions used in this notebook")

print("\n✨ Your Cortex Analyst data is now ready for analysis!")
