In [9]:
import unittest

from great_assertions import GreatAssertionResult, GreatAssertions
from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()


class SaveTest(GreatAssertions):
    def setUp(self):
        self.spark = SparkSession.builder.getOrCreate()

    def test_fail(self):
        df = self.spark.createDataFrame(
            [
                {"col_1": 100, "col_2": 10},
            ]
        )
        self.expect_table_row_count_to_equal(df, 20)

    def test_pass1(self):
        df = self.spark.createDataFrame(
            [
                {"col_1": 100, "col_2": 10},
            ]
        )
        self.expect_table_row_count_to_equal(df, 1)

    def test_pass2(self):
        df = self.spark.createDataFrame(
            [
                {"col_1": 100, "col_2": 10},
            ]
        )
        self.expect_table_row_count_to_equal(df, 1)

    def test_pass3(self):
        df = self.spark.createDataFrame(
            [
                {"col_1": 100, "col_2": 10},
            ]
        )
        self.expect_table_row_count_to_equal(df, 1)

    def test_fail2(self):
        df = self.spark.createDataFrame(
            [
                {"col_1": 100, "col_2": 10},
            ]
        )
        self.expect_table_row_count_to_equal(df, 55)

    def test_fail3(self):
        df = self.spark.createDataFrame(
            [
                {"col_1": 100, "col_2": 10},
            ]
        )
        self.expect_table_row_count_to_equal(df, 99)

    def test_pass3(self):
        df = self.spark.createDataFrame(
            [
                {"col_1": 100, "col_2": 10},
                {"col_1": 200, "col_2": 20},
                {"col_1": 300, "col_2": 30},
            ]
        )
        self.expect_table_row_count_to_be_greater_than(df, 2)

    def test_fail4(self):
        df = self.spark.createDataFrame(
            [
                {"col_1": 100, "col_2": 10},
                {"col_1": 200, "col_2": 20},
                {"col_1": 300, "col_2": 30},
            ]
        )
        self.expect_table_row_count_to_be_greater_than(df, 4)

    def test_pass4(self):
        df = self.spark.createDataFrame(
            [
                {"col_1": 100, "col_2": 10},
                {"col_1": 100, "col_2": 20},
                {"col_1": 300, "col_2": 30},
            ]
        )
        self.expect_table_has_no_duplicate_rows(df)

    def test_fail5(self):
        df = self.spark.createDataFrame(
            [
                {"col_1": 100, "col_2": 10},
                {"col_1": 100, "col_2": 10},
                {"col_1": 300, "col_2": 30},
            ]
        )
        self.expect_table_has_no_duplicate_rows(df)

    def test_pass5(self):
        # int
        df = self.spark.createDataFrame(
            [{"col_1": 100}, {"col_1": 200}, {"col_1": 300}]
        )
        self.expect_column_values_to_be_between(
            df, "col_1", min_value=99, max_value=301
        )

    def test_fail6(self):
        df = self.spark.createDataFrame(
            [{"col_1": 100}, {"col_1": 200}, {"col_1": 300}]
        )
        self.expect_column_values_to_be_between(df, "col_1", 101, 301)

    def test_pass6(self):
        df = self.spark.createDataFrame(
            [{"col_1": "BA2"}, {"col_1": "BA15"}, {"col_1": "SW1"}]
        )
        self.expect_column_values_to_match_regex(df, "col_1", "^[a-zA-Z]{2}[0-9]{1,2}$")

    def test_fail7(self):
        df = self.spark.createDataFrame(
            [{"col_1": "BA2"}, {"col_1": "BA151"}, {"col_1": "AAA13"}]
        )
        self.expect_column_values_to_match_regex(df, "col_1", "^[a-zA-Z]{2}[0-9]{1,2}$")

    def test_pass7(self):
        df_fruits = [
            {"col_1": "Apple"},
            {"col_1": "Orange"},
            {"col_1": "Cherry"},
            {"col_1": "Apricot(Summer)"},
        ]
        fruits = set(("Apple", "Orange", "Pear", "Cherry", "Apricot(Summer)"))
        df = self.spark.createDataFrame(df_fruits)

        self.expect_column_values_to_be_in_set(df, "col_1", fruits)

    def test_pass8(self):
        df = self.spark.createDataFrame(
            [
                {"col_1": "BA2", "col_2": 10, "col_3": 10.45},
                {"col_1": "BA15", "col_2": 20, "col_3": 10.45},
                {"col_1": "SW1", "col_2": 30, "col_3": 10.45},
            ]
        )
        self.expect_column_values_to_be_of_type(df, "col_1", str)

    def test_pass9(self):
        df = self.spark.createDataFrame(
            [
                {"col_1": "2019-05-13"},
                {"col_1": "2018-12-12"},
                {"col_1": "2015-10-01"},
            ]
        )
        self.expect_date_range_to_be_less_than(df, "col_1", "2019-05-14")

    def test_fail8(self):
        df = self.spark.createDataFrame(
            [
                {"col_1": "2019-05-13"},
                {"col_1": "2018-12-12"},
                {"col_1": "2015-10-01"},
            ]
        )
        self.expect_date_range_to_be_less_than(df, "col_1", "2019-05-13")

    def test_pass10(self):
        df = self.spark.createDataFrame([{"col_1": ""}])

        self.expect_date_range_to_be_more_than(df, "col_1", "1899-12-31")

    def test_fail9(self):
        df = self.spark.createDataFrame(
            [
                {"col_1": "2019-05-13"},
                {"col_1": "2018-12-12"},
                {"col_1": "2015-10-01"},
            ]
        )
        self.expect_date_range_to_be_more_than(df, "col_1", "2015-10-01")


suite = unittest.TestLoader().loadTestsFromTestCase(SaveTest)
test_runner = unittest.runner.TextTestRunner(resultclass=GreatAssertionResult)
result = test_runner.run(suite)
result.save(format="pyspark", spark=spark)

FFFFFFFFF..........
FAIL: test_fail (__main__.SaveTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/var/folders/hd/643ldtb90kl47t0v8x569lbr1mrhfd/T/ipykernel_56739/151546010.py", line 19, in test_fail
    self.expect_table_row_count_to_equal(df, 20)
  File "/Users/sam.treweek/Desktop/great_assertions/ve/lib/python3.9/site-packages/great_assertions-0.0.64-py3.9.egg/great_assertions.py", line 199, in expect_table_row_count_to_equal
    raise self.failureException(msg)
AssertionError: expected row count is 20 the actual was 1 : 

FAIL: test_fail2 (__main__.SaveTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/var/folders/hd/643ldtb90kl47t0v8x569lbr1mrhfd/T/ipykernel_56739/151546010.py", line 51, in test_fail2
    self.expect_table_row_count_to_equal(df, 55)
  File "/Users/sam.treweek/Desktop/great_assertions/ve/lib/python3.9/site-packages/great_asse