In [0]:
import unittest
from pyspark.sql.functions import col

class TestItemsNotebook(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        # Create a sample items DataFrame similar to your raw input
        data = [
            (7680852290, "BARILLA BUCATINI RIGATI", 2, "Barilla", "14 OZ", 14.0, "OZ"),
            (9505900016, "BARILLA PLUS SPAGHETTI", 2, "Barilla Plus", "16 OZ", 16.0, "OZ"),
            (1510000001, "CREAMETTE ELBW MACARONI SMALL", 2, "Creamette", "7 OZ", 7.0, "OZ")
        ]
        columns = [
            "code", "description", "type_indexed", "brand_indexed",
            "size_str", "size_value", "size_unit"
        ]
        cls.df = spark.createDataFrame(data, columns)
    
    @classmethod
    def tearDownClass(cls):
        pass  # No need to stop Spark in Databricks

    def test_brand_and_type_indexed(self):
        df = self.df
        brands = df.select("brand_indexed").rdd.flatMap(lambda x: x).collect()
        types = df.select("type_indexed").rdd.flatMap(lambda x: x).collect()
        self.assertTrue(all(isinstance(b, str) for b in brands))
        self.assertTrue(all(isinstance(t, int) for t in types))
    
    def test_size_value_positive(self):
        df = self.df
        size_values = df.select("size_value").rdd.flatMap(lambda x: x).collect()
        self.assertTrue(all(isinstance(val, float) for val in size_values))
        self.assertTrue(all(val > 0 for val in size_values))
    
    def test_size_unit_consistency(self):
        valid_units = {"OZ", "LB", "G", "KG"}
        df = self.df
        units = df.select("size_unit").rdd.flatMap(lambda x: x).collect()
        self.assertTrue(all(unit in valid_units for unit in units))

if __name__ == "__main__":
    unittest.main(argv=['first-arg-is-ignored'], exit=False)