In [0]:
from pyspark.sql.window import Window
from pyspark.sql.functions import row_number, col, current_timestamp, last_value
from datetime import datetime, timedelta
dbutils.widgets.text("trade_date", "")
trade_date = dbutils.widgets.get("trade_date")


In [0]:
trade_date = "2020-08-06"
prev_date = (datetime.strptime(trade_date, "%Y-%m-%d") - timedelta(days=1)).strftime("%Y-%m-%d")

In [0]:
#Load today's trade date
df_trade = spark.read.format("delta").load("/Volumes/capstone/default/trade_data").filter(f"trade_dt = '{trade_date}'")
df_trade.createOrReplaceTempView("trade_temp")

In [0]:
%sql
CREATE OR REPLACE TEMP VIEW tmp_trade_moving_avg AS
SELECT symbol,
       exchange,
       event_tm,
       event_seq_nb,
       trade_pr,
       AVG(trade_pr) OVER(PARTITION BY symbol ORDER BY event_tm RANGE BETWEEN INTERVAL 30 MINUTE PRECEDING AND CURRENT ROW) AS mov_avg_pr
FROM trade_temp

In [0]:
#Load and extract previous day's last trade
df_prev = spark.read.format("delta").load("/Volumes/capstone/default/trade_data").filter(f"trade_dt = '{prev_date}'")
df_prev.createOrReplaceTempView("prev_temp")

In [0]:
%sql
CREATE OR REPLACE TEMP VIEW temp_last_trade AS
SELECT symbol,
       exchange,
       trade_pr AS close_pr
       from (
        SELECT *, row_number() over(partition by symbol, exchange order by event_tm desc) as rn
        FROM prev_temp)
        WHERE rn = 1

In [0]:
#Load quote data
df_quote = spark.read.format("delta").load("/Volumes/capstone/default/quote_data") \
    .filter(f"trade_dt = '{trade_date}'")

df_quote.createOrReplaceTempView("tmp_quotes")


In [0]:
%sql
CREATE OR REPLACE TEMP VIEW quote_union AS
SELECT
    trade_dt, symbol, event_tm, event_seq_nb, exchange,
    bid_pr, bid_size, ask_pr, ask_size,
    NULL AS trade_pr, NULL AS mov_avg_pr,
    'Q' AS rec_type
FROM tmp_quotes

UNION ALL

SELECT
    NULL AS trade_dt, symbol, event_tm, event_seq_nb, exchange,
    NULL, NULL, NULL, NULL,
    trade_pr, mov_avg_pr,
    'T' AS rec_type
FROM tmp_trade_moving_avg


In [0]:
spark.sql("""
CREATE OR REPLACE TEMP VIEW quote_union_update AS
SELECT *,
    LAST_VALUE(trade_pr, TRUE) OVER (
        PARTITION BY symbol, exchange
        ORDER BY event_tm
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    ) AS last_trade_pr,
    LAST_VALUE(mov_avg_pr, TRUE) OVER (
        PARTITION BY symbol, exchange
        ORDER BY event_tm
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    ) AS last_mov_avg_pr
FROM quote_union
""")

DataFrame[]

In [0]:
spark.sql("""
CREATE OR REPLACE TEMP VIEW quote_update AS
SELECT 
    trade_dt, symbol, event_tm, event_seq_nb, exchange,
    bid_pr, bid_size, ask_pr, ask_size,
    last_trade_pr, last_mov_avg_pr
FROM quote_union_update
WHERE rec_type = 'Q'
""")

DataFrame[]

In [0]:
%sql
CREATE OR REPLACE TEMP VIEW quote_final AS
SELECT 
  q.*,
  q.bid_pr - t.close_pr AS bid_pr_mv,
  q.ask_pr - t.close_pr AS ask_pr_mv
FROM quote_update q
LEFT JOIN temp_last_trade t
ON q.symbol = t.symbol AND q.exchange = t.exchange

In [0]:
%sql
select * from quote_final

trade_dt,symbol,event_tm,event_seq_nb,exchange,bid_pr,bid_size,ask_pr,ask_size,last_trade_pr,last_mov_avg_pr,bid_pr_mv,ask_pr_mv
2020-08-06,SYMA,2020-08-06T09:38:08.093Z,1,NASDAQ,78.13,100,79.83,100,,,0.88,2.58
2020-08-06,SYMA,2020-08-06T09:46:05.163Z,2,NASDAQ,76.52,100,76.57,100,,,-0.73,-0.68
2020-08-06,SYMA,2020-08-06T09:52:14.798Z,3,NASDAQ,78.75,100,79.09,100,,,1.5,1.84
2020-08-06,SYMA,2020-08-06T09:58:51.806Z,4,NASDAQ,75.61,100,76.95,100,,,-1.64,-0.3
2020-08-06,SYMA,2020-08-06T10:07:40.796Z,5,NASDAQ,77.45,100,78.73,100,,,0.2,1.48
2020-08-06,SYMA,2020-08-06T10:15:34.939Z,6,NASDAQ,79.3,100,81.07,100,,,2.05,3.82
2020-08-06,SYMA,2020-08-06T10:22:14.67Z,7,NASDAQ,77.76,100,79.25,100,,,0.51,2.0
2020-08-06,SYMA,2020-08-06T10:29:36.901Z,8,NASDAQ,75.6,100,76.96,100,,,-1.65,-0.29
2020-08-06,SYMA,2020-08-06T10:35:26.6Z,9,NASDAQ,76.3,100,77.62,100,,,-0.95,0.37
2020-08-06,SYMA,2020-08-06T10:50:52.665Z,11,NASDAQ,77.96,100,79.18,100,78.93,78.93,0.71,1.93


In [0]:
spark.sql("SELECT * FROM quote_final").write.format("delta") \
    .option("mergeSchema", "true") \
    .mode("overwrite") \
    .save(f"/Volumes/capstone/default/quote_analytics/date={trade_date}")