In [0]:
%run ./01_batchWC

	Executing Word Count...Done
76665


In [0]:
class batchWCTestSuite():
    def __init__(self):
        self.base_data_dir="/FileStore/week01"

    def cleanTests(self):
        print(f"Starting Cleanup...", end='')
        spark.sql("drop table if exists word_count_table")
        dbutils.fs.rm("/user/hive/warehouse/word_count_table", True)

        dbutils.fs.rm(f"{self.base_data_dir}/checkpoint", True)
        dbutils.fs.rm(f"{self.base_data_dir}/data", True)

        dbutils.fs.mkdirs(f"{self.base_data_dir}/data")
        print("Done\n")

    def ingestData(self, itr):
        print(f"\Starting ingestion...", end='')
        dbutils.fs.cp(f"{self.base_data_dir}/text_0{itr}.txt", f"{self.base_data_dir}/data/")
        print("Done")

    def assertResult(self, expected_count):
        print(f"\Starting validation...", end='')
        actual_count=spark.sql("select sum(count) from word_count_table").collect()[0][0]
        assert expected_count==actual_count, f"Test failed! actual count is {actual_count}"
        print(actual_count)

    def runTests(self):
        self.cleanTests()
        wc=batchWC()

        print("Testing first iteration of batch word count...")
        self.cleanTests()
        self.ingestData(1)
        wc.wordCount()
        self.assertResult(29010)
        print("First iteration of batch word count completed.\n")

        print("Testing second iteration of batch word count...")
        #self.cleanTests()
        self.ingestData(2)
        wc.wordCount()
        self.assertResult(175797)
        print("Second iteration of batch word count completed.\n")

        print("Testing third iteration of batch word count...")
        #self.cleanTests()
        self.ingestData(3)
        wc.wordCount()
        self.assertResult(252462)
        print("Third iteration of batch word count completed.\n")
                                                                  


In [0]:
bwcTS = batchWCTestSuite()
bwcTS.runTests()

Starting Cleanup...Done

Testing first iteration of batch word count...
Starting Cleanup...Done

\Starting ingestion...Done
	Executing Word Count...Done
\Starting validation...29010
First iteration of batch word count completed.

Testing second iteration of batch word count...
\Starting ingestion...Done
	Executing Word Count...Done
\Starting validation...175797
Second iteration of batch word count completed.

Testing third iteration of batch word count...
\Starting ingestion...Done
	Executing Word Count...Done
\Starting validation...252462
Third iteration of batch word count completed.

