In [0]:
%run ./Streaming-Aggregates

In [0]:
import time
from pyspark.sql.functions import expr

In [0]:
class AggregationTestSuite():
    def __init__(self):
        self.base_data_dir = "/FileStore/test"

    def cleanTests(self):
        print(f"Starting Cleanup...")
        spark.sql("drop table if exists invoices_bz")
        spark.sql("drop table if exists customer_rewards")
        dbutils.fs.rm("/user/hive/warehouse/invoices_bz", True)
        dbutils.fs.rm("/user/hive/warehouse/customer_rewards", True)

        dbutils.fs.rm(f"{self.base_data_dir}/checkpoint/invoices_bz", True)
        dbutils.fs.rm(f"{self.base_data_dir}/checkpoint/customer_rewards", True)

        dbutils.fs.rm(f"{self.base_data_dir}/data/invoices", True)
        dbutils.fs.mkdirs(f"{self.base_data_dir}/data/invoices")
        print("Done")

    def ingestData(self, itr):
        print(f"Starting Ingestion...")
        dbutils.fs.cp(f"{self.base_data_dir}/invoices_{itr}.json", f"{self.base_data_dir}/data/invoices/")
        print("File moved successfully.")

    def assertBronze(self, expected_count):
        print(f"Starting Bronze validation...")
        actual_count = spark.sql("select count(*) from invoices_bz").collect()[0][0]
        assert expected_count == actual_count, f"Test failed! actual count is {actual_count}"
        print("Bronze Test Passed Successfully.")

    def assertGold(self, expected_value):
        print(f"Starting Gold validation...")
        actual_value = spark.sql("select TotalAmount from customer_rewards where CustomerCardNo = '2262471989'").collect()[0][0]
        assert expected_value == actual_value, f"Test failed! actual value is {actual_value}"
        print("Bronze Test Passed Successfully.")

    def waitForMicroBatch(self, sleep=120):
        print(f"Waiting for {sleep} seconds...", end='')
        time.sleep(sleep)
        print("Done.")    

    def runTests(self):
        self.cleanTests()
        bzStream = Bronze()
        bzQuery = bzStream.process()

        gdStream = Gold()
        gdQuery = gdStream.process()       

        print("\nTesting first iteration of invoice stream...") 
        self.ingestData(1)
        self.waitForMicroBatch()        
        self.assertBronze(501)
        self.assertGold(36859)
        print("First iteration of validations passed.\n")

        print("\nTesting second iteration of invoice stream...") 
        self.ingestData(2)
        self.waitForMicroBatch()        
        self.assertBronze(501+500)
        self.assertGold(36859+20740)
        print("Second iteration of validations passed.n")

        print("\nTesting third iteration of invoice stream...") 
        self.ingestData(3)
        self.waitForMicroBatch()        
        self.assertBronze(501+500+590)
        self.assertGold(36859+20740+31959)
        print("Third iteration of validations passed.\n")

        bzQuery.stop()
        gdQuery.stop()


In [0]:
aTS = AggregationTestSuite()
aTS.runTests()	

Starting Cleanup...
Done
Starting Bronze Stream ...
Done

Starting Gold Stream ...

Testing first iteration of invoice stream...
Starting Ingestion...
File moved successfully.
Waiting for 120 seconds...Done.
Starting Bronze validation...
Bronze Test Passed Successfully.
Starting Gold validation...
Bronze Test Passed Successfully.
First iteration of validations passed.


Testing second iteration of invoice stream...
Starting Ingestion...
File moved successfully.
Waiting for 120 seconds...Done.
Starting Bronze validation...
Bronze Test Passed Successfully.
Starting Gold validation...
Bronze Test Passed Successfully.
Second iteration of validations passed.n

Testing third iteration of invoice stream...
Starting Ingestion...
File moved successfully.
Waiting for 120 seconds...Done.
Starting Bronze validation...
Bronze Test Passed Successfully.
Starting Gold validation...
Bronze Test Passed Successfully.
Third iteration of validations passed.

