In [0]:
%run ./01-config

In [0]:
class SetupHelper():
    def __init__(self, env):
        Conf = Config()
        self.landing_zone = Conf.base_data_path + "/raw"
        self.checkpoint_base = Conf.base_dir_checkpoint + "/checkpoints"
        self.catalog = env
        self.db_name = "ecommerce_db_sv"
        self.initialized = False

    def create_db(self):
        # spark.catalog.clearCache()
        print(f"Creating the database {self.catalog}.{self.db_name}...", end='')
        spark.sql(f"CREATE DATABASE IF NOT EXISTS {self.catalog}.{self.db_name}")
        spark.sql(f"USE {self.catalog}.{self.db_name}")
        self.initialized = True
        print("Done")

    def create_customer_sv(self):
        if(self.initialized):
            print(f"Creating the silver table {self.catalog}.{self.db_name}.customer...", end='')
            spark.sql(f"""
                CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.customer_sv (
                    customer_id STRING,
                    customer_unique_id STRING,
                    customer_city STRING,
                    customer_state STRING,
                    primary key (customer_id)
                )
                """)
            print("Done")
        else: 
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_orders_sv(self):
        if(self.initialized):
            print(f"Creating the silver table {self.catalog}.{self.db_name}.orders...", end='')
            spark.sql(f"""
                CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.orders_sv (
                    order_id STRING,
                    customer_id STRING,
                    order_status STRING,
                    order_purchase_timestamp TIMESTAMP,
                    order_approved_at TIMESTAMP,
                    order_delivered_carrier_date TIMESTAMP,
                    order_delivered_customer_date TIMESTAMP,
                    order_estimated_delivery_date TIMESTAMP,
                    primary key (order_id)
                )
                """)
            print("Done")
        else: 
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")
    
    def create_order_items_sv(self):        
        if(self.initialized):
            print(f"Creating the silver table {self.catalog}.{self.db_name}.order_items...", end='')
            spark.sql(f"""
                CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.order_items_sv (
                    order_id STRING,
                    order_item_id INT,
                    product_id STRING,
                    seller_id STRING,
                    shipping_limit_date TIMESTAMP,
                    price FLOAT,
                    freight_value FLOAT
                )
                """)
            print("Done")
        else: 
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")
    
    def create_order_payment_sv(self):
        if(self.initialized):
            print(f"Creating the silver table {self.catalog}.{self.db_name}.order_payment...", end='')
            spark.sql(f""" 
                CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.order_payment_sv (
                    order_id STRING,
                    payment_sequential TINYINT,
                    payment_type STRING,
                    payment_installments TINYINT,
                    payment_value FLOAT             
                )
                """)
            print("Done")
        else: 
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_order_reviews_sv(self):
        if(self.initialized):
            print(f"Creating the silver table {self.catalog}.{self.db_name}.order_reviews...", end='')
            spark.sql(f"""
                CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.order_reviews_sv (
                    review_id STRING,
                    order_id STRING,
                    review_score TINYINT,
                    review_comment_title STRING,
                    review_comment_message STRING,
                    review_creation_date TIMESTAMP,
                    review_answer_timestamp TIMESTAMP  
                )
                """)
            print("Done")
        else: 
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_products_sv(self):
        if(self.initialized):
            print(f"Creating the silver table {self.catalog}.{self.db_name}.products...", end='')
            spark.sql(f"""
                CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.products_sv (
                    product_id STRING,
                    product_category_name STRING,
                    product_name_lenght TINYINT,
                    product_description_lenght INT,
                    product_photos_qty TINYINT,
                    product_weight_g INT,
                    product_length_cm TINYINT,
                    product_height_cm TINYINT,
                    product_width_cm TINYINT,
                    primary key (product_id)
                )
                """)
            print("Done")
        else: 
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_sellers_sv(self):
        if(self.initialized):
            print(f"Creating the silver table {self.catalog}.{self.db_name}.sellers...", end='')
            spark.sql(f"""
                CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.sellers_sv (
                    seller_id STRING,
                    seller_city STRING,
                    seller_state STRING,
                    primary key (seller_id)
                )
                """)
            print("Done")
        else: 
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_date_table_sv(self):
        if self.initialized:
            print(f"Creating the silver table date...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.date_lookup_sv (
                DateKey  int   
                ,  Date  timestamp
                ,  DayOfWeek  tinyint   
                ,  DayName  string
                ,  DayOfMonth  tinyint   
                ,  DayOfYear  smallint     
                ,  MonthName  string
                ,  MonthOfYear  smallint  
                ,  Quarter  tinyint  
                ,  QuarterName  string
                ,  Year  smallint  
                ,  IsWeekday  string
            )
            """)  
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")
    
    def setup(self):
        self.create_db()
        self.create_customer_sv()
        self.create_orders_sv()
        self.create_order_items_sv()
        self.create_order_payment_sv()
        self.create_order_reviews_sv()
        self.create_products_sv()
        self.create_sellers_sv()
        self.create_date_table_sv()

    def cleanup(self): 
        if spark.sql(f"SHOW DATABASES IN {self.catalog}").filter(f"databaseName == '{self.db_name }'").count() == 1:
            print(f"Đang xóa database {self.catalog}.{self.db_name }...", end='')
            spark.sql(f"DROP DATABASE {self.catalog}.{self.db_name } CASCADE")
            print("Hoàn thành!")

In [0]:
helper = SetupHelper('dev')
helper.cleanup()
helper.setup()