In [0]:
import unittest
from pyspark.sql.functions import col

class TestItemsNotebook(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        # Path to your parquet file in Azure Blob
        parquet_path = "/mnt/silver/items/"
        
        # Load DataFrame from Parquet file in Azure Blob storage
        cls.df = spark.read.parquet(parquet_path)
    
    @classmethod
    def tearDownClass(cls):
        pass  # No need to stop Spark in Databricks

    def test_brand_and_type_indexed(self):
        df = self.df
        brands = df.select("brand_indexed").rdd.flatMap(lambda x: x).collect()
        types = df.select("type_indexed").rdd.flatMap(lambda x: x).collect()
        self.assertTrue(all(isinstance(b,  float) for b in brands))
        self.assertTrue(all(isinstance(t, float) for t in types))
    
    def test_size_value_positive(self):
        df = self.df
        size_values = df.select("size_value").rdd.flatMap(lambda x: x).collect()
        self.assertTrue(all(val is None or isinstance(val, float) for val in size_values))
        self.assertTrue(all(val is None or val >= 0 for val in size_values))

    
    def test_size_unit_consistency(self):
        valid_units = {"OZ", "LB", "G", "KG", "UNKNOWN"}
        df = self.df
        units = df.select("size_unit").rdd.flatMap(lambda x: x).collect()
        self.assertTrue(all(unit in valid_units for unit in units))

if __name__ == "__main__":
    unittest.main(argv=['first-arg-is-ignored'], exit=False)
