In [None]:
# Import required libraries
import snowflake.connector
import pandas as pd
import os
from dotenv import load_dotenv
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Load environment variables
load_dotenv()

# Configure plotting
plt.style.use('default')
sns.set_palette("husl")

print("📦 Libraries loaded successfully!")


In [None]:
# Connect to Snowflake
conn = snowflake.connector.connect(
    account=os.getenv('SNOWFLAKE_ACCOUNT'),
    user=os.getenv('SNOWFLAKE_USER'),
    password=os.getenv('SNOWFLAKE_PASSWORD'),
    warehouse=os.getenv('SNOWFLAKE_WAREHOUSE'),
    database=os.getenv('SNOWFLAKE_DATABASE', 'CRYPTO_ANALYTICS'),
    schema=os.getenv('SNOWFLAKE_SCHEMA', 'RAW_DATA')
)

cursor = conn.cursor()

print("🔗 Connected to Snowflake successfully!")
print(f"Database: {os.getenv('SNOWFLAKE_DATABASE', 'CRYPTO_ANALYTICS')}")
print(f"Schema: {os.getenv('SNOWFLAKE_SCHEMA', 'RAW_DATA')}")


In [None]:
# Create staging schema and dynamic tables for incremental processing
cursor.execute("CREATE SCHEMA IF NOT EXISTS STAGING")
cursor.execute("CREATE SCHEMA IF NOT EXISTS ANALYTICS")

# Create staging view for cleaned trades
cursor.execute("""
    CREATE OR REPLACE VIEW STAGING.TRADES_CLEAN AS
    SELECT 
        trade_id,
        user_id,
        symbol,
        UPPER(side) as side,
        quantity,
        price,
        quantity * price as notional_value,
        timestamp::timestamp as trade_timestamp,
        UPPER(status) as status,
        UPPER(exchange) as exchange,
        UPPER(order_type) as order_type,
        fees,
        settlement_date::date as settlement_date,
        _loaded_at,
        _file_name,
        -- Derived fields
        SPLIT_PART(symbol, '-', 1) as base_currency,
        SPLIT_PART(symbol, '-', 2) as quote_currency,
        DATE(timestamp) as trade_date,
        HOUR(timestamp) as trade_hour,
        CASE WHEN status = 'COMPLETED' THEN 1 ELSE 0 END as is_completed
    FROM RAW_DATA.CRYPTO_TRADES
    WHERE trade_id IS NOT NULL
""")

print("✅ Staging schema and views created successfully!")


In [None]:
# Create dynamic table for daily trading metrics (incremental processing)
cursor.execute("USE SCHEMA ANALYTICS")

cursor.execute("""
    CREATE OR REPLACE DYNAMIC TABLE ANALYTICS.DAILY_TRADING_METRICS
    TARGET_LAG = '1 minute'
    WAREHOUSE = 'COMPUTE_WH'
    AS
    SELECT 
        trade_date,
        symbol,
        base_currency,
        quote_currency,
        exchange,
        COUNT(*) as total_trades,
        COUNT(CASE WHEN side = 'BUY' THEN 1 END) as buy_trades,
        COUNT(CASE WHEN side = 'SELL' THEN 1 END) as sell_trades,
        COUNT(CASE WHEN is_completed = 1 THEN 1 END) as completed_trades,
        SUM(quantity) as total_volume,
        SUM(notional_value) as total_notional,
        AVG(price) as avg_price,
        MIN(price) as min_price,
        MAX(price) as max_price,
        SUM(fees) as total_fees,
        COUNT(DISTINCT user_id) as unique_traders,
        -- Volume-weighted average price
        SUM(notional_value) / SUM(quantity) as vwap
    FROM STAGING.TRADES_CLEAN
    WHERE is_completed = 1
    GROUP BY trade_date, symbol, base_currency, quote_currency, exchange
""")

print("✅ Dynamic table DAILY_TRADING_METRICS created successfully!")


In [None]:
# Create analytics views for the Streamlit dashboard
cursor.execute("""
    CREATE OR REPLACE VIEW ANALYTICS.TOP_PERFORMING_ASSETS AS
    SELECT 
        symbol,
        base_currency,
        SUM(total_notional) as total_volume,
        AVG(avg_price) as avg_price,
        MAX(max_price) as high_price,
        MIN(min_price) as low_price,
        SUM(total_trades) as total_trades,
        SUM(unique_traders) as total_unique_traders
    FROM ANALYTICS.DAILY_TRADING_METRICS
    GROUP BY symbol, base_currency
    ORDER BY total_volume DESC
""")

cursor.execute("""
    CREATE OR REPLACE VIEW ANALYTICS.USER_TRADING_SUMMARY AS
    SELECT 
        t.user_id,
        u.first_name || ' ' || u.last_name as full_name,
        u.tier,
        u.country,
        COUNT(*) as total_trades,
        SUM(t.notional_value) as total_volume,
        AVG(t.notional_value) as avg_trade_size,
        COUNT(DISTINCT t.symbol) as unique_symbols
    FROM STAGING.TRADES_CLEAN t
    JOIN RAW_DATA.USER_PROFILES u ON t.user_id = u.user_id
    WHERE t.is_completed = 1
    GROUP BY t.user_id, u.first_name, u.last_name, u.tier, u.country
    ORDER BY total_volume DESC
""")

# Test the pipeline with sample data
sample_data = pd.read_sql("""
    SELECT symbol, total_volume, total_trades 
    FROM ANALYTICS.TOP_PERFORMING_ASSETS 
    LIMIT 5
""", conn)

print("✅ Analytics views created successfully!")
print("📊 Sample data from pipeline:")
print(sample_data)

# Close connection
cursor.close()
conn.close()
print("🔒 Pipeline setup complete!")
