# Test: Snowflake Temporary Table Permissions

This notebook tests whether we have permissions to create temporary tables in Snowflake.

Temporary tables would allow us to:
- Create intermediate result sets for complex queries
- Avoid redundant CTE logic across multiple queries
- Improve query performance for multi-step analysis
- Simplify data pipeline logic

Temp tables are session-scoped and auto-drop on disconnect.

In [7]:
# --- IMPORTS ---
import os
from dotenv import load_dotenv
import snowflake.connector
import pandas as pd
from datetime import date, timedelta

print("[INFO] Imports complete")

[INFO] Imports complete


In [2]:
# --- SNOWFLAKE CONNECTION ---
load_dotenv('../.env')

try:
    conn = snowflake.connector.connect(
        user=os.getenv('SNOWFLAKE_USER'),
        password=os.getenv('SNOWFLAKE_PASSWORD'),
        account=os.getenv('SNOWFLAKE_ACCOUNT'),
        warehouse=os.getenv('SNOWFLAKE_WAREHOUSE', 'COMPUTE_WH'),
        database='INCREMENTALITY',
        schema='INCREMENTALITY_RESEARCH'
    )
    print("[SUCCESS] Snowflake connection established.")
    print(f"  Database: {conn.database}")
    print(f"  Schema: {conn.schema}")
    print(f"  Warehouse: {conn.warehouse}")
    print(f"  User: {conn.user}")
except Exception as e:
    print(f"[FAILURE] Could not connect to Snowflake: {e}")

[SUCCESS] Snowflake connection established.
  Database: INCREMENTALITY
  Schema: INCREMENTALITY_RESEARCH
  Warehouse: COMPUTE_WH
  User: Pranjal


## Test 1: Create Simple Temporary Table

In [8]:
# --- TEST 1: CREATE TEMPORARY TABLE ---

print("\n" + "="*80)
print("TEST 1: CREATE TEMPORARY TABLE")
print("="*80)

if 'conn' in locals() and conn and not conn.is_closed():
    cursor = conn.cursor()
    
    try:
        # Attempt to create a simple temporary table
        print("\n[ATTEMPT] Creating temporary table: TEMP_TEST_TABLE")
        
        create_query = """
        CREATE TEMPORARY TABLE TEMP_TEST_TABLE (
            id INTEGER,
            name VARCHAR(100),
            value FLOAT
        )
        """
        
        cursor.execute(create_query)
        print("[SUCCESS] Temporary table created successfully!")
        
        # Insert test data
        print("\n[ATTEMPT] Inserting test data...")
        insert_query = """
        INSERT INTO TEMP_TEST_TABLE VALUES
            (1, 'test_a', 100.5),
            (2, 'test_b', 200.7),
            (3, 'test_c', 300.9)
        """
        cursor.execute(insert_query)
        print("[SUCCESS] Test data inserted!")
        
        # Query the table
        print("\n[ATTEMPT] Querying temporary table...")
        select_query = "SELECT * FROM TEMP_TEST_TABLE ORDER BY id"
        df_test = pd.read_sql(select_query, conn)
        print("[SUCCESS] Query successful!")
        print("\nData retrieved:")
        print(df_test)
        
        # Drop the table
        print("\n[ATTEMPT] Dropping temporary table...")
        cursor.execute("DROP TABLE IF EXISTS TEMP_TEST_TABLE")
        print("[SUCCESS] Temporary table dropped!")
        
        print("\n" + "="*80)
        print("✅ TEST 1 PASSED: We CAN create temporary tables!")
        print("="*80)
        
    except Exception as e:
        print(f"\n[FAILURE] Error during temporary table test: {e}")
        print(f"Error type: {type(e).__name__}")
        print("\n" + "="*80)
        print("❌ TEST 1 FAILED: Cannot create temporary tables")
        print("="*80)
    
    finally:
        cursor.close()
else:
    print("[ERROR] No active Snowflake connection")


TEST 1: CREATE TEMPORARY TABLE

[ATTEMPT] Creating temporary table: TEMP_TEST_TABLE

[FAILURE] Error during temporary table test: 003540 (42501): SQL execution error: Creating table on shared database 'INCREMENTALITY' is not allowed.
Error type: ProgrammingError

❌ TEST 1 FAILED: Cannot create temporary tables


## Test 2: Create Temporary Table from Query (CTAS)

In [9]:
# --- TEST 2: CREATE TEMPORARY TABLE AS SELECT (CTAS) ---

print("\n" + "="*80)
print("TEST 2: CREATE TEMPORARY TABLE AS SELECT")
print("="*80)

if 'conn' in locals() and conn and not conn.is_closed():
    cursor = conn.cursor()
    
    try:
        # Calculate date range (last 7 days)
        end_date = date.today()
        start_date = end_date - timedelta(days=7)
        start_date_str = start_date.strftime('%Y-%m-%d')
        end_date_str = end_date.strftime('%Y-%m-%d')
        
        print(f"\n[ATTEMPT] Creating temporary table from AUCTIONS_USERS query")
        print(f"  Date range: {start_date_str} to {end_date_str}")
        
        # Create temp table from actual data
        ctas_query = f"""
        CREATE TEMPORARY TABLE TEMP_SAMPLED_USERS AS
        SELECT 
            OPAQUE_USER_ID,
            COUNT(*) as num_auctions,
            MIN(CREATED_AT) as first_auction,
            MAX(CREATED_AT) as last_auction
        FROM AUCTIONS_USERS
        WHERE CREATED_AT BETWEEN '{start_date_str}' AND '{end_date_str}'
        GROUP BY OPAQUE_USER_ID
        LIMIT 100
        """
        
        cursor.execute(ctas_query)
        print("[SUCCESS] Temporary table created from query!")
        
        # Query the temp table
        print("\n[ATTEMPT] Querying temporary table...")
        df_users = pd.read_sql("SELECT * FROM TEMP_SAMPLED_USERS LIMIT 10", conn)
        print("[SUCCESS] Query successful!")
        print(f"\nSample data (first 10 users):")
        print(df_users)
        
        # Get table stats
        print("\n[ATTEMPT] Getting table statistics...")
        count_df = pd.read_sql("SELECT COUNT(*) as total_users FROM TEMP_SAMPLED_USERS", conn)
        print(f"[SUCCESS] Total users in temp table: {count_df['TOTAL_USERS'].iloc[0]:,}")
        
        # Drop the table
        print("\n[ATTEMPT] Dropping temporary table...")
        cursor.execute("DROP TABLE IF EXISTS TEMP_SAMPLED_USERS")
        print("[SUCCESS] Temporary table dropped!")
        
        print("\n" + "="*80)
        print("✅ TEST 2 PASSED: We CAN create temp tables from queries (CTAS)!")
        print("="*80)
        
    except Exception as e:
        print(f"\n[FAILURE] Error during CTAS test: {e}")
        print(f"Error type: {type(e).__name__}")
        print("\n" + "="*80)
        print("❌ TEST 2 FAILED: Cannot create temp tables from queries")
        print("="*80)
    
    finally:
        cursor.close()
else:
    print("[ERROR] No active Snowflake connection")


TEST 2: CREATE TEMPORARY TABLE AS SELECT

[ATTEMPT] Creating temporary table from AUCTIONS_USERS query
  Date range: 2025-10-04 to 2025-10-11

[FAILURE] Error during CTAS test: 003540 (42501): SQL execution error: Creating table on shared database 'INCREMENTALITY' is not allowed.
Error type: ProgrammingError

❌ TEST 2 FAILED: Cannot create temp tables from queries


## Test 3: Join Temporary Tables

In [10]:
# --- TEST 3: JOIN MULTIPLE TEMPORARY TABLES ---

print("\n" + "="*80)
print("TEST 3: JOIN MULTIPLE TEMPORARY TABLES")
print("="*80)

if 'conn' in locals() and conn and not conn.is_closed():
    cursor = conn.cursor()
    
    try:
        # Calculate date range
        end_date = date.today()
        start_date = end_date - timedelta(days=7)
        start_date_str = start_date.strftime('%Y-%m-%d')
        end_date_str = end_date.strftime('%Y-%m-%d')
        
        print(f"\n[ATTEMPT] Creating first temp table: TEMP_USERS_SAMPLE")
        
        # Create first temp table: sampled users
        query1 = f"""
        CREATE TEMPORARY TABLE TEMP_USERS_SAMPLE AS
        SELECT DISTINCT OPAQUE_USER_ID
        FROM AUCTIONS_USERS
        WHERE CREATED_AT BETWEEN '{start_date_str}' AND '{end_date_str}'
          AND MOD(ABS(HASH(OPAQUE_USER_ID)), 10000) < 10
        LIMIT 50
        """
        cursor.execute(query1)
        print("[SUCCESS] First temp table created!")
        
        print("\n[ATTEMPT] Creating second temp table: TEMP_USER_AUCTIONS")
        
        # Create second temp table: auction counts for sampled users
        query2 = f"""
        CREATE TEMPORARY TABLE TEMP_USER_AUCTIONS AS
        SELECT 
            au.OPAQUE_USER_ID,
            COUNT(*) as auction_count
        FROM AUCTIONS_USERS au
        INNER JOIN TEMP_USERS_SAMPLE us ON au.OPAQUE_USER_ID = us.OPAQUE_USER_ID
        WHERE au.CREATED_AT BETWEEN '{start_date_str}' AND '{end_date_str}'
        GROUP BY au.OPAQUE_USER_ID
        """
        cursor.execute(query2)
        print("[SUCCESS] Second temp table created!")
        
        print("\n[ATTEMPT] Joining temporary tables...")
        
        # Query joining the temp tables
        join_query = """
        SELECT 
            us.OPAQUE_USER_ID,
            ua.auction_count
        FROM TEMP_USERS_SAMPLE us
        LEFT JOIN TEMP_USER_AUCTIONS ua ON us.OPAQUE_USER_ID = ua.OPAQUE_USER_ID
        ORDER BY ua.auction_count DESC NULLS LAST
        LIMIT 10
        """
        
        df_joined = pd.read_sql(join_query, conn)
        print("[SUCCESS] Join successful!")
        print(f"\nJoined results (top 10 users by auction count):")
        print(df_joined)
        
        # Clean up
        print("\n[ATTEMPT] Dropping temporary tables...")
        cursor.execute("DROP TABLE IF EXISTS TEMP_USERS_SAMPLE")
        cursor.execute("DROP TABLE IF EXISTS TEMP_USER_AUCTIONS")
        print("[SUCCESS] Temporary tables dropped!")
        
        print("\n" + "="*80)
        print("✅ TEST 3 PASSED: We CAN join multiple temp tables!")
        print("="*80)
        
    except Exception as e:
        print(f"\n[FAILURE] Error during join test: {e}")
        print(f"Error type: {type(e).__name__}")
        print("\n" + "="*80)
        print("❌ TEST 3 FAILED: Cannot join temp tables")
        print("="*80)
    
    finally:
        cursor.close()
else:
    print("[ERROR] No active Snowflake connection")


TEST 3: JOIN MULTIPLE TEMPORARY TABLES

[ATTEMPT] Creating first temp table: TEMP_USERS_SAMPLE

[FAILURE] Error during join test: 003540 (42501): SQL execution error: Creating table on shared database 'INCREMENTALITY' is not allowed.
Error type: ProgrammingError

❌ TEST 3 FAILED: Cannot join temp tables


## Summary: Temporary Table Capabilities

In [11]:
# --- SUMMARY ---

print("\n" + "="*80)
print("SUMMARY: TEMPORARY TABLE CAPABILITIES")
print("="*80)

print("""
If all tests passed, we can:

✅ CREATE TEMPORARY TABLE with explicit schema
✅ INSERT data into temporary tables
✅ QUERY temporary tables with SELECT
✅ CREATE TEMPORARY TABLE AS SELECT (CTAS) from queries
✅ JOIN multiple temporary tables
✅ DROP temporary tables manually

Benefits for data pulls:
1. Create TEMP_SAMPLED_USERS once, reuse across 6 queries
2. Avoid repeating CTE logic in every query
3. Better query performance (Snowflake can optimize)
4. Cleaner, more readable SQL code
5. Session-scoped (auto-cleanup on disconnect)

Example usage pattern:

```python
# 1. Create temp table with sampled users
cursor.execute('''
    CREATE TEMPORARY TABLE TEMP_SAMPLED_USERS AS
    SELECT OPAQUE_USER_ID FROM (
        SELECT OPAQUE_USER_ID, MOD(ABS(HASH(OPAQUE_USER_ID)), 10000) AS bucket
        FROM (SELECT DISTINCT OPAQUE_USER_ID FROM AUCTIONS_USERS 
              WHERE CREATED_AT BETWEEN '2025-01-01' AND '2025-01-14')
    ) WHERE bucket < 10
''')

# 2. Use in multiple queries without repeating CTE
df_auctions = pd.read_sql('''
    SELECT * FROM AUCTIONS_USERS au
    WHERE au.OPAQUE_USER_ID IN (SELECT OPAQUE_USER_ID FROM TEMP_SAMPLED_USERS)
''', conn)

df_impressions = pd.read_sql('''
    SELECT * FROM IMPRESSIONS i
    WHERE i.USER_ID IN (SELECT OPAQUE_USER_ID FROM TEMP_SAMPLED_USERS)
''', conn)

# ... etc for all 6 tables

# 3. Cleanup (optional - auto-drops on disconnect)
cursor.execute('DROP TABLE IF EXISTS TEMP_SAMPLED_USERS')
```

Recommendation: Update CLAUDE.md if tests pass!
""")

print("\n[INFO] Testing complete. Review results above.")


SUMMARY: TEMPORARY TABLE CAPABILITIES

If all tests passed, we can:

✅ CREATE TEMPORARY TABLE with explicit schema
✅ INSERT data into temporary tables
✅ QUERY temporary tables with SELECT
✅ CREATE TEMPORARY TABLE AS SELECT (CTAS) from queries
✅ JOIN multiple temporary tables
✅ DROP temporary tables manually

Benefits for data pulls:
1. Create TEMP_SAMPLED_USERS once, reuse across 6 queries
2. Avoid repeating CTE logic in every query
3. Better query performance (Snowflake can optimize)
4. Cleaner, more readable SQL code
5. Session-scoped (auto-cleanup on disconnect)

Example usage pattern:

```python
# 1. Create temp table with sampled users
cursor.execute('''
    CREATE TEMPORARY TABLE TEMP_SAMPLED_USERS AS
    SELECT OPAQUE_USER_ID FROM (
        SELECT OPAQUE_USER_ID, MOD(ABS(HASH(OPAQUE_USER_ID)), 10000) AS bucket
        FROM (SELECT DISTINCT OPAQUE_USER_ID FROM AUCTIONS_USERS 
              WHERE CREATED_AT BETWEEN '2025-01-01' AND '2025-01-14')
    ) WHERE bucket < 10
''')

# 2.

In [12]:
# --- CLEANUP: CLOSE CONNECTION ---

if 'conn' in locals() and conn and not conn.is_closed():
    conn.close()
    print("[INFO] Snowflake connection closed.")
    print("[INFO] All temporary tables automatically dropped on disconnect.")

[INFO] Snowflake connection closed.
[INFO] All temporary tables automatically dropped on disconnect.


---

## Diagnosis: Why Temp Tables Failed

**Error:** `Creating table on shared database 'INCREMENTALITY' is not allowed`

This means the database is **shared** (read-only access). We cannot create any tables (temp or permanent) in shared databases.

In [13]:
# --- CHECK DATABASE TYPE ---

print("\n" + "="*80)
print("CHECKING DATABASE TYPE")
print("="*80)

if 'conn' in locals() and conn and not conn.is_closed():
    cursor = conn.cursor()
    
    try:
        # Check if database is shared
        print("\n[ATTEMPT] Checking database properties...")
        
        query = """
        SHOW DATABASES LIKE 'INCREMENTALITY'
        """
        
        cursor.execute(query)
        results = cursor.fetchall()
        
        if results:
            columns = [desc[0] for desc in cursor.description]
            df_db = pd.DataFrame(results, columns=columns)
            print("[SUCCESS] Database information retrieved:")
            print(df_db[['name', 'kind', 'origin']].to_string())
            
            if 'origin' in df_db.columns:
                origin = df_db['origin'].iloc[0] if len(df_db) > 0 else None
                if origin and origin != '':
                    print(f"\n⚠️  Database is SHARED from: {origin}")
                    print("   This explains why we cannot create temp tables.")
                else:
                    print("\n✓  Database appears to be local (not shared)")
        else:
            print("[WARNING] Could not retrieve database information")
        
        # Check our current privileges
        print("\n[ATTEMPT] Checking current user privileges...")
        priv_query = "SHOW GRANTS ON DATABASE INCREMENTALITY"
        cursor.execute(priv_query)
        grants = cursor.fetchall()
        
        if grants:
            grant_cols = [desc[0] for desc in cursor.description]
            df_grants = pd.DataFrame(grants, columns=grant_cols)
            print("[SUCCESS] Current privileges:")
            print(df_grants.to_string())
        
    except Exception as e:
        print(f"\n[INFO] Error checking database: {e}")
    
    finally:
        cursor.close()
else:
    print("[ERROR] No active Snowflake connection")


CHECKING DATABASE TYPE
[ERROR] No active Snowflake connection


---

## Test 4: Check for Alternative Databases

If INCREMENTALITY is shared, we might have access to other databases where we can create temp tables.

In [14]:
# --- CHECK FOR OTHER DATABASES ---

print("\n" + "="*80)
print("TEST 4: CHECK FOR ALTERNATIVE DATABASES")
print("="*80)

if 'conn' in locals() and conn and not conn.is_closed():
    cursor = conn.cursor()
    
    try:
        print("\n[ATTEMPT] Listing all accessible databases...")
        
        cursor.execute("SHOW DATABASES")
        results = cursor.fetchall()
        
        if results:
            columns = [desc[0] for desc in cursor.description]
            df_dbs = pd.DataFrame(results, columns=columns)
            print("[SUCCESS] Found databases:")
            print(df_dbs[['name', 'kind', 'origin']].to_string())
            
            # Look for non-shared databases
            if 'origin' in df_dbs.columns:
                local_dbs = df_dbs[df_dbs['origin'].isna() | (df_dbs['origin'] == '')]
                if len(local_dbs) > 0:
                    print(f"\n✓  Found {len(local_dbs)} local (non-shared) database(s):")
                    print(local_dbs['name'].tolist())
                    print("\n   We could potentially create temp tables in these databases.")
                else:
                    print("\n⚠️  All databases appear to be shared.")
            
        else:
            print("[WARNING] No databases found")
        
    except Exception as e:
        print(f"\n[INFO] Error listing databases: {e}")
    
    finally:
        cursor.close()
else:
    print("[ERROR] No active Snowflake connection")


TEST 4: CHECK FOR ALTERNATIVE DATABASES
[ERROR] No active Snowflake connection


---

## Test 5: Performance Comparison - CTE vs Subquery

Since we can't use temp tables, let's test if repeated CTEs vs subqueries have performance differences.

In [15]:
# --- TEST CTE PERFORMANCE ---

import time

print("\n" + "="*80)
print("TEST 5: CTE PERFORMANCE")
print("="*80)

if 'conn' in locals() and conn and not conn.is_closed():
    
    end_date = date.today()
    start_date = end_date - timedelta(days=7)
    start_date_str = start_date.strftime('%Y-%m-%d')
    end_date_str = end_date.strftime('%Y-%m-%d')
    
    # Approach 1: Single CTE used multiple times in one query
    print("\n[TEST] Approach 1: Single query with CTE (efficient)")
    start_time = time.time()
    
    query_cte = f"""
    WITH SAMPLED_USERS AS (
        SELECT OPAQUE_USER_ID 
        FROM (
            SELECT OPAQUE_USER_ID, MOD(ABS(HASH(OPAQUE_USER_ID)), 10000) AS bucket
            FROM (SELECT DISTINCT OPAQUE_USER_ID FROM AUCTIONS_USERS 
                  WHERE CREATED_AT BETWEEN '{start_date_str}' AND '{end_date_str}')
        ) WHERE bucket < 10
    )
    SELECT 
        COUNT(DISTINCT OPAQUE_USER_ID) as num_users,
        COUNT(*) as num_auctions
    FROM AUCTIONS_USERS
    WHERE OPAQUE_USER_ID IN (SELECT OPAQUE_USER_ID FROM SAMPLED_USERS)
      AND CREATED_AT BETWEEN '{start_date_str}' AND '{end_date_str}'
    """
    
    df1 = pd.read_sql(query_cte, conn)
    time1 = time.time() - start_time
    print(f"  Time: {time1:.2f} seconds")
    print(f"  Result: {df1.to_dict('records')[0]}")
    
    # Approach 2: Repeated subquery (less efficient)
    print("\n[TEST] Approach 2: Repeated subquery (potentially less efficient)")
    start_time = time.time()
    
    query_subquery = f"""
    SELECT 
        COUNT(DISTINCT OPAQUE_USER_ID) as num_users,
        COUNT(*) as num_auctions
    FROM AUCTIONS_USERS
    WHERE OPAQUE_USER_ID IN (
        SELECT OPAQUE_USER_ID 
        FROM (
            SELECT OPAQUE_USER_ID, MOD(ABS(HASH(OPAQUE_USER_ID)), 10000) AS bucket
            FROM (SELECT DISTINCT OPAQUE_USER_ID FROM AUCTIONS_USERS 
                  WHERE CREATED_AT BETWEEN '{start_date_str}' AND '{end_date_str}')
        ) WHERE bucket < 10
    )
    AND CREATED_AT BETWEEN '{start_date_str}' AND '{end_date_str}'
    """
    
    df2 = pd.read_sql(query_subquery, conn)
    time2 = time.time() - start_time
    print(f"  Time: {time2:.2f} seconds")
    print(f"  Result: {df2.to_dict('records')[0]}")
    
    print(f"\n[RESULT] Performance difference: {abs(time1 - time2):.2f} seconds")
    if time1 < time2:
        print(f"  CTE was {((time2/time1 - 1)*100):.1f}% faster")
    else:
        print(f"  Subquery was {((time1/time2 - 1)*100):.1f}% faster")
    
    print("\n[INFO] Note: Snowflake's query optimizer may handle both similarly.")
    
else:
    print("[ERROR] No active Snowflake connection")


TEST 5: CTE PERFORMANCE
[ERROR] No active Snowflake connection


---

## Test 6: Python-side Materialization

Alternative approach: Fetch sampled users into Python, then use them in subsequent queries.

In [16]:
# --- TEST PYTHON-SIDE MATERIALIZATION ---

print("\n" + "="*80)
print("TEST 6: PYTHON-SIDE MATERIALIZATION")
print("="*80)

if 'conn' in locals() and conn and not conn.is_closed():
    
    end_date = date.today()
    start_date = end_date - timedelta(days=7)
    start_date_str = start_date.strftime('%Y-%m-%d')
    end_date_str = end_date.strftime('%Y-%m-%d')
    
    print("\n[STEP 1] Fetch sampled user IDs into Python...")
    start_time = time.time()
    
    sample_query = f"""
    SELECT OPAQUE_USER_ID 
    FROM (
        SELECT OPAQUE_USER_ID, MOD(ABS(HASH(OPAQUE_USER_ID)), 10000) AS bucket
        FROM (SELECT DISTINCT OPAQUE_USER_ID FROM AUCTIONS_USERS 
              WHERE CREATED_AT BETWEEN '{start_date_str}' AND '{end_date_str}')
    ) WHERE bucket < 10
    """
    
    df_users = pd.read_sql(sample_query, conn)
    user_list = df_users['OPAQUE_USER_ID'].tolist()
    time_fetch = time.time() - start_time
    
    print(f"  Fetched {len(user_list):,} users in {time_fetch:.2f} seconds")
    
    if len(user_list) > 0:
        print("\n[STEP 2] Use user list in subsequent query...")
        start_time = time.time()
        
        # Format user IDs for SQL IN clause (limited to first 100 to avoid query size issues)
        user_subset = user_list[:min(100, len(user_list))]
        user_ids_str = "','".join(user_subset)
        
        data_query = f"""
        SELECT 
            COUNT(DISTINCT OPAQUE_USER_ID) as num_users,
            COUNT(*) as num_auctions
        FROM AUCTIONS_USERS
        WHERE OPAQUE_USER_ID IN ('{user_ids_str}')
          AND CREATED_AT BETWEEN '{start_date_str}' AND '{end_date_str}'
        """
        
        df_result = pd.read_sql(data_query, conn)
        time_query = time.time() - start_time
        
        print(f"  Query executed in {time_query:.2f} seconds")
        print(f"  Result: {df_result.to_dict('records')[0]}")
        
        total_time = time_fetch + time_query
        print(f"\n[RESULT] Total time: {total_time:.2f} seconds")
        
        print(f"\n[WARNING] This approach has limitations:")
        print(f"  - SQL query size limits (max ~16MB)")
        print(f"  - Not suitable for large user lists (>10k users)")
        print(f"  - Requires two round-trips to Snowflake")
        print(f"  - Only tested with {len(user_subset)} users (truncated from {len(user_list)})")
        print(f"\n[RECOMMENDATION] Stick with CTE approach for large datasets.")
    else:
        print("\n[WARNING] No users found in sample. Cannot test this approach.")
    
else:
    print("[ERROR] No active Snowflake connection")


TEST 6: PYTHON-SIDE MATERIALIZATION
[ERROR] No active Snowflake connection


---

## Final Summary & Recommendations

In [17]:
# --- FINAL SUMMARY ---

print("\n" + "="*80)
print("FINAL SUMMARY & RECOMMENDATIONS")
print("="*80)

print("""
## Findings:

❌ TEMPORARY TABLES: NOT AVAILABLE
   Reason: INCREMENTALITY database is SHARED (read-only access)
   Error: "Creating table on shared database is not allowed"

## Recommended Approach:

✅ USE CTEs (Common Table Expressions)
   - Continue using WITH clauses as we currently do
   - Snowflake's optimizer handles CTEs efficiently
   - No permission issues
   - Works perfectly with shared databases

## Current Data Pull Pattern (OPTIMAL):

```python
# Define CTE once
cte_sql = '''
WITH SAMPLED_USER_IDS AS (
    SELECT OPAQUE_USER_ID FROM (
        SELECT OPAQUE_USER_ID, MOD(ABS(HASH(OPAQUE_USER_ID)), 10000) AS bucket
        FROM (SELECT DISTINCT OPAQUE_USER_ID FROM AUCTIONS_USERS 
              WHERE CREATED_AT BETWEEN '2025-01-01' AND '2025-01-14')
    ) WHERE bucket < 10
)
'''

# Use in each query
query1 = cte_sql + '''
SELECT * FROM AUCTIONS_USERS au
WHERE au.OPAQUE_USER_ID IN (SELECT OPAQUE_USER_ID FROM SAMPLED_USER_IDS)
'''

query2 = cte_sql + '''
SELECT * FROM IMPRESSIONS i
WHERE i.USER_ID IN (SELECT OPAQUE_USER_ID FROM SAMPLED_USER_IDS)
'''

# ... etc for all 6 tables
```

## Performance Notes:

1. Snowflake optimizes CTEs automatically
2. Each query is independent (can run in parallel if needed)
3. No session state to manage
4. No cleanup required

## CLAUDE.md Update:

❌ DO NOT add temporary table guidance
✅ Document that INCREMENTALITY is a shared database
✅ Confirm CTE approach is the correct pattern
✅ Note: No table creation permissions (shared database)

""")

print("[INFO] Testing complete. CTE approach remains optimal.")


FINAL SUMMARY & RECOMMENDATIONS

## Findings:

❌ TEMPORARY TABLES: NOT AVAILABLE
   Reason: INCREMENTALITY database is SHARED (read-only access)
   Error: "Creating table on shared database is not allowed"

## Recommended Approach:

✅ USE CTEs (Common Table Expressions)
   - Continue using WITH clauses as we currently do
   - Snowflake's optimizer handles CTEs efficiently
   - No permission issues
   - Works perfectly with shared databases

## Current Data Pull Pattern (OPTIMAL):

```python
# Define CTE once
cte_sql = '''
WITH SAMPLED_USER_IDS AS (
    SELECT OPAQUE_USER_ID FROM (
        SELECT OPAQUE_USER_ID, MOD(ABS(HASH(OPAQUE_USER_ID)), 10000) AS bucket
        FROM (SELECT DISTINCT OPAQUE_USER_ID FROM AUCTIONS_USERS 
              WHERE CREATED_AT BETWEEN '2025-01-01' AND '2025-01-14')
    ) WHERE bucket < 10
)
'''

# Use in each query
query1 = cte_sql + '''
SELECT * FROM AUCTIONS_USERS au
WHERE au.OPAQUE_USER_ID IN (SELECT OPAQUE_USER_ID FROM SAMPLED_USER_IDS)
'''

query2 = cte