### Config catalog widget

In [0]:
dbutils.widgets.text(name="env",defaultValue='',label='Enter the environment or catalog in lower case')
env = dbutils.widgets.get("env")
print(env)

### Defining required Location Paths

In [0]:
%run "./commons"


### Handling duplicates records

In [0]:
def remove_duplicates(df):
    print("Removing duplicates for dataframe..")
    df_unique = df.drop_duplicates()
    print("Sucessful !!..")
    return df_unique

### Handling null values

In [0]:
def remove_null(df,columns):
    print("Replacing null values from string columns with 'Unknown'")
    df_null = df.fillna("Unknown",subset = columns)
    print('Successful replaced null in string cols !!')

    print("Replacing null values from numeric columns with 0")
    df_cleaned = df_null.fillna(0,subset =  columns)
    print('Successful replaced null in numeric cols !!')
    return df_cleaned

### Getting count of Electric vehicles by creating a new column

In [0]:
def total_electric_vehicles_byHour(df):
    df_withElectric = df.withColumn('electric_vehicles_count',df.EV_Car+df.EV_Bike)
    print("Successfully generated total ev vehicle count")
    return df_withElectric

### Getting count of Motor vehicles by creating a new column

In [0]:
def total_motor_vehicles_byHour(df):
    df_withMotor = df.withColumn('motor_vehicles_count',df.Two_wheeled_motor_vehicles + df.EV_Car + df.EV_Bike + df.Cars_and_taxis + df.Buses_and_coaches + df.LGV_Type + df.HGV_Type)
    print("Successfully generated total ev vehicle count")
    return df_withMotor

### Create transformed timestamp

In [0]:
def createTransformedTime(df):
    from pyspark.sql.functions import current_timestamp
    df = df.withColumn('Transformed_time',current_timestamp())
    print('Successfully added transformed timestamp !!')
    return df

### Write final transformed data to silver traffic table

In [0]:
def write_stream_Silver_traffic(catalog,df):
    write_silver_traffic = (df.writeStream.format('delta')
        .option('checkpointLocation',f"{checkpoints}/SilverTrafficLoad/checkpts")
        .outputMode('append')
        .queryName('SilverTrafficWriteStream')
        .trigger(availableNow=True)
        .toTable(f"`{catalog}`.`silver`.`traffic`"))

    write_silver_traffic.awaitTermination()

    print("Write successful to Silver traffic table..!!")

In [0]:
df_sample = spark.readStream.table(f"`{env}`.`bronze`.`raw_traffic`")

df_noDups = remove_duplicates(df_sample)

df_noNulls = remove_null(df_noDups,columns = df_sample.columns)

df_EVCounts = total_electric_vehicles_byHour(df_noNulls)

df_motorCounts = total_motor_vehicles_byHour(df_EVCounts)

df_finalTransform = createTransformedTime(df_motorCounts)

write_stream_Silver_traffic(env,df_finalTransform)