In [0]:
%run ./Tumbling-Window

In [0]:
import time
from pyspark.sql.functions import expr, to_timestamp

In [0]:
class TradeSummaryTestSuite:
  def __init__(self):
    self.base_data_dir = "/FileStore/test"

  def cleanTests(self):
    print(f"Starting Cleanup...")
    spark.sql("drop table if exists kafka_bz")
    spark.sql("drop table if exists trade_summary")
    dbutils.fs.rm("/user/hive/warehouse/kafka_bz", True)
    dbutils.fs.rm("/user/hive/warehouse/trade_summary", True)

    dbutils.fs.rm(f"{self.base_data_dir}/checkpoint/kafka_bz", True)
    dbutils.fs.rm(f"{self.base_data_dir}/checkpoint/trade_summary", True)

    spark.sql("CREATE TABLE kafka_bz (key string, value string)")

    print("Done.")

  def waitForMicroBatch(self, sleep=30):
    print(f"Waiting for {sleep} seconds...")
    time.sleep(sleep)
    print("Done.")

  def assertTradeSummary(self, start, end, expected_buy, expected_sell):
    print("Starting Trade Summary Validation ...")
    result = spark.sql(f"""SELECT TotalBuy, TotalSell
                       from trade_summary
                       WHERE date_format(start, 'yyyy-MM-dd HH:mm:ss') = '{start}'
                       AND date_format(end, 'yyyy-MM-dd HH:mm:ss') = '{end}'
                       """).collect()
    actual_buy = result[0][0]
    actual_sell = result[0][1]
    assert expected_buy == actual_buy, f"Test failed! Actual buy amount is {actual_buy}."
    assert expected_sell == actual_sell, f"Test failed! Actual sell amount is {actual_sell}."
    print("Done.")

  def runTests(self):
    self.cleanTests()

    stream = TradeSummary()
    sQuery = stream.process()

    print("Testing first 2 events ...")
    spark.sql("""INSERT INTO kafka_bz VALUES
              ('2019-02-05', '{"CreatedTime": "2019-02-05 10:05:00", "Type": "BUY", "Amount": 500, "BrokerCode": "ABX"}'),
              ('2019-02-05', '{"CreatedTime": "2019-02-05 10:12:00", "Type": "BUY", "Amount": 300, "BrokerCode": "ABX"}')""")
    self.waitForMicroBatch()
    self.assertTradeSummary('2019-02-05 10:00:00', '2019-02-05 10:15:00', 800, 0)

    print("Testing third and fourth events ...")
    spark.sql("""INSERT INTO kafka_bz VALUES
              ('2019-02-05', '{"CreatedTime": "2019-02-05 10:20:00", "Type": "BUY", "Amount": 600, "BrokerCode": "ABX"}'),
              ('2019-02-05', '{"CreatedTime": "2019-02-05 10:40:00", "Type": "BUY", "Amount": 900, "BrokerCode": "ABX"}')""")
    self.waitForMicroBatch()
    self.assertTradeSummary('2019-02-05 10:15:00', '2019-02-05 10:30:00', 600, 0)
    self.assertTradeSummary('2019-02-05 10:30:00', '2019-02-05 10:45:00', 900, 0)

    print("Testing late event...")
    spark.sql("""INSERT INTO kafka_bz VALUES
              ('2019-02-05', '{"CreatedTime": "2019-02-05 10:48:00", "Type": "SELL", "Amount": 500, "BrokerCode": "ABX"}'),
              ('2019-02-05', '{"CreatedTime": "2019-02-05 10:25:00", "Type": "SELL", "Amount": 400, "BrokerCode": "ABX"}')""")
    self.waitForMicroBatch()
    self.assertTradeSummary('2019-02-05 10:45:00', '2019-02-05 11:00:00', 0, 500)
    self.assertTradeSummary('2019-02-05 10:15:00', '2019-02-05 10:30:00', 600, 400)

    print("Validation passed.\n")        

    sQuery.stop()


In [0]:
ts = TradeSummaryTestSuite()
ts.runTests()	

Starting Cleanup...
Done.
Testing first 2 events ...
Waiting for 30 seconds...
Done.
Starting Trade Summary Validation ...
Done.
Testing third and fourth events ...
Waiting for 30 seconds...
Done.
Starting Trade Summary Validation ...
Done.
Starting Trade Summary Validation ...
Done.

Testing late event...
Waiting for 30 seconds...
Done.
Starting Trade Summary Validation ...
Done.
Starting Trade Summary Validation ...
Done.
Validation passed.



In [0]:
%sql
SELECT * FROM trade_summary
ORDER BY start

start,end,TotalBuy,TotalSell
2019-02-05T10:00:00Z,2019-02-05T10:15:00Z,800,0
2019-02-05T10:15:00Z,2019-02-05T10:30:00Z,600,400
2019-02-05T10:30:00Z,2019-02-05T10:45:00Z,900,0
2019-02-05T10:45:00Z,2019-02-05T11:00:00Z,0,500
