## 1. Configuration

In [0]:
# Imports
from pyspark.sql.functions import (
    window, sum, avg, count, stddev, min, max,
    round as spark_round, col, when
)

In [0]:
# Constants
SILVER_TABLE = "finance_lakehouse.silver.market_trades_clean"
GOLD_VWAP_TABLE = "finance_lakehouse.gold.vwap_1min"
CHECKPOINT_PATH = "/tmp/checkpoints/gold_vwap"

## 2. Read from Silver

In [0]:
silver_stream = (
    spark.readStream
    .format("delta")
    .table(SILVER_TABLE)
)

## 3. Calculate VWAP (1-Minute Tumbling Window)

In [0]:
vwap_stream = (
    silver_stream
    .withWatermark("event_datetime", "10 minutes")
    .groupBy(
        window(col("event_datetime"), "1 minute"),  # Tumbling window
        col("exchange"),
        col("symbol")
    )
    .agg(
        # VWAP calculation
        spark_round(
            sum(col("price") * col("qty")) / sum(col("qty")),
            2
        ).alias("vwap"),

        # Volume metrics
        sum("qty").alias("total_volume"),
        count("*").alias("trade_count"),

        # Price metrics
        avg("price").alias("avg_price"),
        min("price").alias("min_price"),
        max("price").alias("max_price"),
        stddev("price").alias("price_volatility"),

        # Side metrics
        sum(when(col("side") == "buy", col("qty")).otherwise(0)).alias("buy_volume"),
        sum(when(col("side") == "sell", col("qty")).otherwise(0)).alias("sell_volume")
    )
    .select(
        col("window.start").alias("window_start"),
        col("window.end").alias("window_end"),
        col("exchange"),
        col("symbol"),
        col("vwap"),
        col("total_volume"),
        col("trade_count"),
        col("avg_price"),
        col("min_price"),
        col("max_price"),
        spark_round(col("price_volatility"), 4).alias("price_volatility"),
        col("buy_volume"),
        col("sell_volume"),
        spark_round(
            (col("buy_volume") - col("sell_volume")) / col("total_volume") * 100,
            2
        ).alias("buy_pressure_pct")
    )
)

## 4. Write to Gold Table

In [0]:
gold_query = (
    vwap_stream.writeStream
    .format("delta")
    .outputMode("append")
    .option("checkpointLocation", CHECKPOINT_PATH)
    .trigger(processingTime='1 minute')
    .toTable(GOLD_VWAP_TABLE)
)

## 5. Business Analytics Queries

In [0]:
%sql
SELECT
  window_start,
  window_end,
  symbol,
  vwap,
  total_volume,
  buy_pressure_pct
FROM finance_lakehouse.gold.vwap_1min
ORDER BY window_start DESC
LIMIT 20;

In [0]:
%sql
SELECT
  window_start,
  symbol,
  price_volatility,
  trade_count,
  total_volume
FROM finance_lakehouse.gold.vwap_1min
WHERE price_volatility > 100  -- Adjust threshold
ORDER BY price_volatility DESC
LIMIT 10;

In [0]:
# Check all active streams
for stream in spark.streams.active:
    print(f"Stream ID: {stream.id}")
    print(f"Name: {stream.name}")
    print(f"Status: {stream.status}")
    print(f"Last Progress:")
    print(stream.lastProgress)
    print("=" * 80)

In [0]:
# Stop all streams
for stream in spark.streams.active:
    stream.stop()
    stream.awaitTermination()
# Check all active streams
for stream in spark.streams.active:
    print(f"Stream ID: {stream.id}")
    print(f"Name: {stream.name}")
    print(f"Status: {stream.status}")
    print(f"Last Progress:")
    print(stream.lastProgress)
    print("=" * 80)