### Run initial set up code

In [0]:
%run /Workspace/Users/mbothe7@hotmail.com/databricks-end-to-end-project/01-initial-config

### Setting up initial objects

In [0]:
class InitialSetupHelper():
    def __init__(self,env):
        Conf = InitialConfig()
        self.landing_zone = Conf.landing_path+"/raw"
        self.checkpoint_path   = Conf.checkpoint_path
        self.root_path = Conf.root_path
        self.silver_path = Conf.silver_path
        self.bronze_path = Conf.bronze_path
        self.good_path = Conf.gold_path
        self.catalog      = env
        #self.db_name      = Conf.db_name
        self.initialized  = False

    def create_catalog(self):
        print(f"Creating catalog {self.catalog} .....")
        spark.sql(f"""
            CREATE CATALOG IF NOT EXISTS {self.catalog} 
            MANAGED LOCATION '{self.root_path}root'
        """)
        print(f"Catalog {self.catalog} created.")
  
    def create_bronze_Schema(self):
        print(f'Using {self.catalog} Catalog ')
        spark.sql(f""" USE CATALOG {self.catalog}""")
        print(f'Creating Bronze Schema in {self.catalog}')
        spark.sql(f"""CREATE SCHEMA IF NOT EXISTS `bronze` MANAGED LOCATION '{self.bronze_path}'""")
        print("Bronze Schema created.")

    
    def create_silver_Schema(self):
        print(f'Using {self.catalog} Catalog ')
        spark.sql(f""" USE CATALOG '{self.catalog}'""")
        print(f'Creating silver Schema in {self.catalog}')
        spark.sql(f"""CREATE SCHEMA IF NOT EXISTS `silver` MANAGED LOCATION '{self.silver_path}'""") 
        print("Bronze Schema created.")
    
    def create_gold_Schema(self):
        print(f'Using {self.catalog} Catalog ')
        spark.sql(f""" USE CATALOG `{self.catalog}`""")
        print(f'Creating silver Schema in {self.catalog}')
        spark.sql(f"""CREATE SCHEMA IF NOT EXISTS `gold` MANAGED LOCATION '{self.good_path}'""") 
        print("Bronze Schema created.")

    def create_roads_table(self):
        print(f'Using {self.catalog} Catalog ')
                
        if (self.catalog):
            print(f"Creating roads table {self.catalog}.bronze.raw_roads .....")
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.bronze.raw_roads
                    (
                        Road_ID INT,
                        Road_Category_Id INT,
                        Road_Category VARCHAR(255),
                        Region_ID INT,
                        Region_Name VARCHAR(255),
                        Total_Link_Length_Km DOUBLE,
                        Total_Link_Length_Miles DOUBLE,
                        All_Motor_Vehicles DOUBLE                      
                        
                    );"""
                )
            print(f"Table  {self.catalog}.bronze.raw_roads created.")         
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_raw_traffic_table(self):
        spark.sql(f""" USE CATALOG '{self.catalog}'""")
        if (self.catalog):
            print(f"Creating raw_traffic table {self.catalog}.bronze.raw_traffic .....")
            spark.sql(f"""CREATE TABLE IF NOT EXISTS `{self.catalog}`.`bronze`.`raw_traffic`
                        (
                            Record_ID INT,
                            Count_point_id INT,
                            Direction_of_travel VARCHAR(255),
                            Year INT,
                            Count_date VARCHAR(255),
                            hour INT,
                            Region_id INT,
                            Region_name VARCHAR(255),
                            Local_authority_name VARCHAR(255),
                            Road_name VARCHAR(255),
                            Road_Category_ID INT,
                            Start_junction_road_name VARCHAR(255),
                            End_junction_road_name VARCHAR(255),
                            Latitude DOUBLE,
                            Longitude DOUBLE,
                            Link_length_km DOUBLE,
                            Pedal_cycles INT,
                            Two_wheeled_motor_vehicles INT,
                            Cars_and_taxis INT,
                            Buses_and_coaches INT,
                            LGV_Type INT,
                            HGV_Type INT,
                            EV_Car INT,
                            EV_Bike INT,
                            create_at TIMESTAMP
                    );"""
                )
            print(f"Table  {self.catalog}.bronze.raw_traffic created.")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def initial_setup(self):
        import time 
        print(f"Initializing the environment. Please wait...")
        self.create_catalog()
        self.create_bronze_Schema()
        self.create_silver_Schema()
        self.create_gold_Schema()
        self.create_roads_table()
        self.create_raw_traffic_table()
        print(f"Environment initialized.")

    def assert_table(self, table_name, schema):
        print(f"SHOW TABLES IN {self.catalog}.{schema}")
        spark.sql(f"SHOW TABLES IN {self.catalog}.{schema}")
        assert spark.sql(f"SHOW TABLES IN {self.catalog}.{schema}") \
            .filter(f"tableName=='{table_name}'") \
            .count() == 1, f"Table {table_name} not found"
        print(f"Found {table_name} table in {self.catalog}.{schema} : Success")



    def validate_environment(self):
        import time
        start = int(time.time())
        print(f"Validating the environment. Please wait...")
        assert spark.sql(f"SHOW DATABASES IN {self.catalog} ").filter("databaseName=='bronze'").count() == 1, "Bronze Schema not found"
        print(f"Catalog {self.catalog}.bronze validated.")

        assert spark.sql(f"SHOW DATABASES IN {self.catalog} ").filter("databaseName=='silver'").count() == 1, "Silver Schema not found"
        print(f"Catalog {self.catalog}.'silver' validated.")

        assert spark.sql(f"SHOW DATABASES IN {self.catalog} ").filter("databaseName=='gold'").count() == 1, "Gold Schema not found"
        print(f"Catalog {self.catalog}.'gold' validated.")

        self.assert_table("raw_roads","bronze")
        self.assert_table("raw_traffic","bronze")

        print(f"Environment validated in {int(time.time())-start} seconds.")


    def environment_cleanup(self):
        print(f'Cleaning up the environment: {self.catalog}')
        if spark.sql(f"SHOW CATALOGS").filter(f"catalog == '{self.catalog}'").count() == 1:
            print(f"Dropping the database {self.catalog}...", end='')
            spark.sql(f"DROP CATALOG IF EXISTS {self.catalog} CASCADE")
            print("Done")
        






In [0]:
#spark.sql("SHOW CATALOGS").show()