In [0]:
%run ./Word-Count

In [0]:
class batchWCTestSuite():
  def __init__(self):
    self.base_data_dir = "/FileStore/test"
  
  def cleanTest(self):
    print("Starting cleanup ...")
    spark.sql("drop table if exists word_count_table")
    dbutils.fs.rm("/user/hive/warehouse/word_count_table", True)
    dbutils.fs.rm(f"{self.base_data_dir}/checkpoint", True)
    dbutils.fs.rm(f"{self.base_data_dir}/data/text", True)
    dbutils.fs.mkdirs(f"{self.base_data_dir}/data/text")
    print("Done.")

  def ingestData(self, itr):
    print("Starting ingestion ...")
    dbutils.fs.cp(f"{self.base_data_dir}/text_data_{itr}.txt", f"{self.base_data_dir}/data/text/")
    print("Done.\n")

  def assertResult(self, expected_count):
    actual_count = spark.sql("select sum(count) from word_count_table where substr(word, 1, 1) == 's'").collect()[0][0]
    assert expected_count == actual_count, f"Test Failed! Actual count is {actual_count}"
  
  def runTests(self):
    self.cleanTest()
    wc = batchWC()

    print("Testing first iteration of the batch word count ...")
    self.ingestData(1)
    wc.wordCount()
    self.assertResult(25)
    print("First iteration of the batch word count completed.\n")

    print("Testing second iteration of the batch word count ...")
    self.ingestData(2)
    wc.wordCount()
    self.assertResult(32)
    print("Third iteration of the batch word count completed.\n")

    print("Testing third iteration of the batch word count ...")
    self.ingestData(3)
    wc.wordCount()
    self.assertResult(37)
    print("Third iteration of the batch word count completed.\n")

In [0]:
bwcTS = batchWCTestSuite()
bwcTS.runTests()

Starting cleanup ...
Done.
Testing first iteration of the batch word count ...
Starting ingestion ...Done.

	Executing Word Count...Done
First iteration of the batch word count completed.

Testing second iteration of the batch word count ...
Starting ingestion ...Done.

	Executing Word Count...Done
Third iteration of the batch word count completed.

Testing third iteration of the batch word count ...
Starting ingestion ...Done.

	Executing Word Count...Done
Third iteration of the batch word count completed.



In [0]:
import time

In [0]:
class streamWCTestSuite():
  def __init__(self):
    self.base_data_dir = "/FileStore/test"
  
  def cleanTest(self):
    print("Starting cleanup ...")
    spark.sql("drop table if exists word_count_table")
    dbutils.fs.rm("/user/hive/warehouse/word_count_table", True)
    dbutils.fs.rm(f"{self.base_data_dir}/checkpoint", True)
    dbutils.fs.rm(f"{self.base_data_dir}/data/text", True)
    dbutils.fs.mkdirs(f"{self.base_data_dir}/data/text")
    print("Done.")

  def ingestData(self, itr):
    print("Starting ingestion ...", end='')
    dbutils.fs.cp(f"{self.base_data_dir}/text_data_{itr}.txt", f"{self.base_data_dir}/data/text/")
    print("Done.\n")

  def assertResult(self, expected_count):
    actual_count = spark.sql("select sum(count) from word_count_table where substr(word, 1, 1) == 's'").collect()[0][0]
    assert expected_count == actual_count, f"Test Failed! Actual count is {actual_count}"
  
  def runTests(self):
    sleepTime = 30
    self.cleanTest()
    wc = streamWC()
    sQuery = wc.wordCount()

    print("Testing first iteration of the batch word count ...")
    self.ingestData(1)
    print(f"Waiting for {sleepTime} seconds ...")
    time.sleep(sleepTime)
    self.assertResult(25)
    print("First iteration of the batch word count completed.\n")

    print("Testing second iteration of the batch word count ...")
    self.ingestData(2)
    print(f"Waiting for {sleepTime} seconds ...")
    time.sleep(sleepTime)
    self.assertResult(32)
    print("Third iteration of the batch word count completed.\n")

    print("Testing third iteration of the batch word count ...")
    self.ingestData(3)
    print(f"Waiting for {sleepTime} seconds ...")
    time.sleep(sleepTime)
    self.assertResult(37)
    print("Third iteration of the batch word count completed.\n")

    sQuery.stop()

In [0]:
swcTS = streamWCTestSuite()
swcTS.runTests()

Starting cleanup ...
Done.
Starting Word Count Stream ...
Done
Testing first iteration of the batch word count ...
Starting ingestion ...Done.

Waiting for 30 seconds ...
First iteration of the batch word count completed.

Testing second iteration of the batch word count ...
Starting ingestion ...Done.

Waiting for 30 seconds ...
Third iteration of the batch word count completed.

Testing third iteration of the batch word count ...
Starting ingestion ...Done.

Waiting for 30 seconds ...
Third iteration of the batch word count completed.

