#### Importing common settings

In [0]:
%run /Workspace/Users/mbothe7@hotmail.com/databricks-end-to-end-project/01-initial-config

In [0]:
class SilverLoader():
    def __init__(self, env):
        Conf= InitialConfig()
        self.catalog = env
        self.silver_path=Conf.silver_path

    def read_bronze_traffic(self):
        print(f'Reading bronze table {self.catalog}.bronze.raw_traffic')
        traffic = spark.readStream.table(f'{self.catalog}.bronze.raw_traffic')
        print(f'Reading bronze table {self.catalog}.bronze.raw_traffic completed')
        return traffic
    
    def read_bronze_roads(self):
        print(f'Reading bronze table {self.catalog}.bronze.raw_roads')
        roads = spark.readStream.table(f'{self.catalog}.bronze.raw_roads')
        print(f'Reading bronze table {self.catalog}.bronze.raw_roads completed')
        return roads
    
    def road_Category(self,df):
        print('Creating Road Category Name Column: ', end='')
        from pyspark.sql.functions import when,col

        df_road = df.withColumn("Road_Category_Name",
                      when(col('Road_Category') == 'TA', 'Class A Trunk Road')
                      .when(col('Road_Category') == 'TM', 'Class A Trunk Motor')
                      .when(col('Road_Category') == 'PA','Class A Principal road')
                       .when(col('Road_Category') == 'PM','Class A Principal Motorway')
                       .when(col('Road_Category') == 'M','Class B road')
                       .otherwise('NA')
                     )
        print(f'Creating Road Category Name Column completed')
        return df_road
    
    def road_Type(self, df):
        print('Creating Road Type Name Column: ', end='')
        from pyspark.sql.functions import when,col

        df_road_Type = df.withColumn("Road_Type",
                        when(col('Road_Category_Name').like('%Class A%'),'Major')
                        .when(col('Road_Category_Name').like('%Class B%'),'Minor')
                        .otherwise('NA')
                        
                        )
        print('Creating Road Type Name Column completed')
        return df_road_Type


    def remove_duplicates(self, table_name):
        print(f'Removing duplicates from {table_name}')
        df=table_name.dropDuplicates()
        print(f'Removing duplicates from {table_name} completed')
        return df
    
    def remove_nulls(self, table_name,columns):
        print(f'Removing nulls from {table_name}')
        table_name=table_name.fillna('Unknown',subset=columns)

        table_name=table_name.fillna(0,subset=columns)

        return table_name
    
    def ev_count_add(self, df):
        print(f'Adding EV count to the traffic table')
        from pyspark.sql.functions import col
        df_ev = df.withColumn('Electric_Vehicles_Count', col('EV_Car')+col('EV_Bike'))
        print(f'Adding EV count to the traffic table   completed')
        return df_ev
    
    
    def motor_count_add(self, df):
        print(f'Adding EV count to the traffic table ')
        from pyspark.sql.functions import col
        df_ev = df.withColumn('motor_count_add', col('Electric_Vehicles_Count') + col('Two_wheeled_motor_vehicles') + col('Cars_and_taxis') + col('Buses_and_coaches') + col('LGV_Type') + col('HGV_Type'))
        print(f'Adding EV count to the traffic table  completed')
        return df_ev
    
    def create_transformed_time(self,df):
         from pyspark.sql.functions import current_timestamp
         print('Creating Transformed Time column : ',end='')
         df_timestamp = df.withColumn('Transformed_Time',current_timestamp() )
         print('Creating Transformed Time column completed')
         return df_timestamp
    
    def write_silver_traffic(self, df):
        print(f'Writing silver table')

        write_traffic = (df.writeStream
                         .format('delta')
                         .option("checkpointLocation", f'{self.silver_path}/silver_traffic/checkpt/')
                         .outputMode('append')
                         .queryName(f'SilverTrafficStream')
                         .trigger(availableNow=True)
                         .toTable(f'{self.catalog}.silver.traffic_data'))
        write_traffic.awaitTermination()
        print(f'Writing silver table completed')

    def write_silver_roads(self, df):
        print(f'Writing silver roads table')

        write_roads = (df.writeStream
                         .format('delta')
                         .option("checkpointLocation", f'{self.silver_path}/silver_roads/checkpt/')
                         .outputMode('append')
                         .queryName(f'SilverRoadsStream')
                         .trigger(availableNow=True)
                         .toTable(f'{self.catalog}.silver.roads_data'))
        write_roads.awaitTermination()
        print(f'Writing silver roads table completed')

    
    
    def load_silver_traffic(self, table_name='silver.traffic_data'):
        print(f'Loading silver table {table_name}')
        df=self.read_bronze_traffic()
        df=self.remove_duplicates(df)
        columns= df.schema.names
        df=self.remove_nulls(df,columns)
        df_clean=self.ev_count_add(df)
        df_clean=self.motor_count_add(df_clean)
        df_clean=self.create_transformed_time(df_clean)
        self.write_silver_traffic(df_clean)
        print(f'Loading silver table {table_name} completed')

    def load_silver_roads(self, table_name='silver.roads_data'):
        print(f'Loading silver table {table_name}')
        df=self.read_bronze_roads()
        df=self.remove_duplicates(df)
        columns= df.schema.names
        df=self.remove_nulls(df,columns)
        df_clean=self.create_transformed_time(df)
        df=self.road_Category(df)
        df=self.road_Type(df)
        self.write_silver_roads(df)
        print(f'Loading silver table {table_name} completed')



In [0]:
m =SilverLoader("dev")
#traffic=m.load_silver_traffic()
roads=m.load_silver_roads()
