In [0]:
import unittest
from pyspark.sql.functions import col, sin, cos, pi

class TestSalesDataProcessing(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        parquet_path = "/mnt/silver/sales/"
        cls.df = spark.read.parquet(parquet_path)
# Check for correct indicator logic
    def test_province_encoding(self):
        df = self.df.withColumn("province1", (col("province") == 1).cast("int")) \
                    .withColumn("province2", (col("province") == 2).cast("int"))
        results = df.select("province", "province1", "province2").collect()
        for row in results:
            self.assertEqual(row["province1"], 1 if row["province"] == 1 else 0)
            self.assertEqual(row["province2"], 1 if row["province"] == 2 else 0)
# Check logic on time transformation
    def test_hour_extraction_and_cyclic_features(self):
        HOURS_IN_DAY = 24
        df = self.df.withColumn(
            "hour", (col("time") / 100).cast("int")
        )
        df = df.withColumn(
            "hoursin", sin(2 * pi() * col("hour") / HOURS_IN_DAY)
        ).withColumn(
            "hourcos", cos(2 * pi() * col("hour") / HOURS_IN_DAY)
        )
        result = df.select("time", "hour", "hoursin", "hourcos").collect()
        for row in result:
            expected_hour = row["time"] // 100
            self.assertEqual(row["hour"], expected_hour)
            self.assertIsInstance(row["hoursin"], float)
            self.assertIsInstance(row["hourcos"], float)

if __name__ == "__main__":
    unittest.main(argv=['first-arg-is-ignored'], exit=False)