In [0]:
 %run ./01-config

In [0]:
class SetupHelper():
    def __init__(self, env):
        Conf = Config()
        self.landing_zone = Conf.base_data_path + "/raw"
        self.checkpoint_base = Conf.base_dir_checkpoint + "/checkpoints"
        self.catalog = env
        self.db_name = "ecommerce_db_gd"
        self.initialized = False

    def create_db(self):
        spark.catalog.clearCache()
        print(f"Creating the database {self.catalog}.{self.db_name}...", end='')
        spark.sql(f"CREATE DATABASE IF NOT EXISTS {self.catalog}.{self.db_name}")
        spark.sql(f"USE {self.catalog}.{self.db_name}")
        self.initialized = True
        print("Done")

    def create_dim_customer_gd(self):
        if self.initialized:
            print(f"Creating the gold table {self.catalog}.{self.db_name}.dim_customer_gd...", end='')
            spark.sql(f"""
                CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.dim_customer_gd (
                    CustomerKey BIGINT GENERATED ALWAYS AS IDENTITY,
                    CustomerID STRING NOT NULL,
                    CustomerCity STRING,
                    CustomerState STRING,
                    PRIMARY KEY (CustomerKey)
                )
                """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_dim_product_gd(self):
        if self.initialized:
            print(f"Creating the gold table {self.catalog}.{self.db_name}.dim_product_gd...", end='')
            spark.sql(f"""
                CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.dim_product_gd (
                    ProductKey BIGINT GENERATED ALWAYS AS IDENTITY,
                    ProductID STRING NOT NULL,
                    CategoryName STRING NOT NULL,
                    PRIMARY KEY (ProductKey)
                )
                """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_dim_seller_gd(self):
        if self.initialized:
            print(f"Creating the gold table {self.catalog}.{self.db_name}.dim_seller_gd...", end='')
            spark.sql(f"""
                CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.dim_seller_gd (
                    SellerKey BIGINT GENERATED ALWAYS AS IDENTITY,
                    SellerID STRING NOT NULL,
                    SellerCity STRING,
                    SellerState STRING,
                    PRIMARY KEY (SellerKey)
                )
                """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_dim_time_gd(self):
        if self.initialized:
            print(f"Creating the gold table {self.catalog}.{self.db_name}.dim_time_gd...", end='')
            spark.sql(f"""
                CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.dim_time_gd (
                    DateKey INT,
                    Date DATE,
                    DayOfWeek TINYINT,
                    DayName STRING,
                    DayOfMonth TINYINT,
                    DayOfYear SMALLINT,
                    MonthName STRING,
                    MonthOfYear TINYINT,
                    Quarter TINYINT,
                    QuarterName STRING,
                    Year SMALLINT,
                    IsWeekday STRING,
                    PRIMARY KEY (DateKey)
                )
                """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_fact_order_gd(self):
        if self.initialized:
            print(f"Creating the gold table {self.catalog}.{self.db_name}.fact_order_gd...", end='')
            spark.sql(f"""
                CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.fact_order_gd (
                    PurchaseDateKey INT,
                    DeliveredDateKey INT,
                    EstimateDeliveredDateKey INT,
                    CustomerKey BIGINT,
                    order_id STRING NOT NULL,
                    TotalAmount FLOAT,
                    ShipAmount FLOAT,
                    TotalProductValue FLOAT,
                    DeliveryActualDays FLOAT,
                    DeliveryEstimateDays INT,
                    ApproveDays INT,
                    PRIMARY KEY (order_id)
                )
                """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_fact_sale_gd(self):
        if self.initialized:
            print(f"Creating the gold table {self.catalog}.{self.db_name}.fact_sale_gd...", end='')
            spark.sql(f"""
                CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.fact_sale_gd (
                    PurchaseDateKey INT,
                    DeliveredDateKey INT,
                    EstimateDeliveredDateKey INT,
                    CustomerKey BIGINT,
                    ProductKey BIGINT,
                    SellerKey BIGINT,
                    order_item_id STRING NOT NULL,
                    order_id STRING NOT NULL,
                    product_id STRING NOT NULL,
                    Price FLOAT,
                    FreightValue FLOAT,
                    TotalValue FLOAT,
                    PRIMARY KEY (order_item_id, order_id, product_id)
                )
                """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def setup(self):
        self.create_db()
        self.create_dim_customer_gd()
        self.create_dim_product_gd()
        self.create_dim_seller_gd()
        self.create_dim_time_gd()
        self.create_fact_order_gd()
        self.create_fact_sale_gd()

    def cleanup(self): 
        if spark.sql(f"SHOW DATABASES IN {self.catalog}").filter(f"databaseName == '{self.db_name}'").count() == 1:
            print(f"Dropping database {self.catalog}.{self.db_name}...", end='')
            spark.sql(f"DROP DATABASE {self.catalog}.{self.db_name} CASCADE")
            print("Done!")

In [0]:
GD = SetupHelper("dev")
GD.cleanup()
GD.setup()