### Get environment name from widget and assign to variable 

In [0]:
dbutils.widgets.text(name="env", defaultValue='', label='Enter the environment in lower case')
env = dbutils.widgets.get("env")


<br>

### Call common notebook to access shared variables and methods

In [0]:
%run "/Workspace/Users/yen#ext#@yenyahoo.onmicrosoft.com/3_Common"


### Reading the data from bronze raw_traffic Table

In [0]:

def read_BronzeTrafficTable(environment):
    print('Reading the Bronze Table Data : ',end='')
    df_bronzeTraffic = (spark.readStream
                    .table(f"`{environment}_catalog`.`bronze`.raw_traffic")
                    )
    print(f'Reading {environment}_catalog.bronze.raw_traffic Success!')
    return df_bronzeTraffic


### Getting count of Electric vehicles by creating new column


In [0]:
def ev_Count(df):

    print('Creating Electric Vehicles Count Column : ', end='')

    from pyspark.sql.functions import col

    df_ev = df.withColumn('Electric_Vehicles_Count', 
                          col('EV_Car') + col('EV_Bike')
                          ) 
    
    print('Success!! ')

    return df_ev


### Creating columns to get Count of all motor vehicles


In [0]:
def Motor_Count(df):

    print('Creating All Motor Vehicles Count Column : ', end='')

    from pyspark.sql.functions import col

    df_motor = df.withColumn('Motor_Vehicles_Count',
                            col('Electric_Vehicles_Count') + col('Two_wheeled_motor_vehicles') + col('Cars_and_taxis') + col('Buses_and_coaches') + col('LGV_Type') + col('HGV_Type')
                            )
    
    print('Success!! ')
    
    return df_motor


### Creating Transformed Time column

In [0]:
def create_TransformedTime(df):

    from pyspark.sql.functions import current_timestamp

    print('Creating Transformed Time column : ', end='')

    df_timestamp = df.withColumn('Transformed_Time',
                      current_timestamp()
                      )
    
    print('Success!!')
    
    return df_timestamp


### Writing the Transformed data to Silver_Traffic Table

In [0]:
def write_Traffic_SilverTable(StreamingDF,environment):

    print('Writing the silver_traffic Data : ', end='') 

    write_StreamSilver = (StreamingDF.writeStream
                .format('delta')
                .option('checkpointLocation', checkpoint + "/SilverTrafficLoad/Checkpt/")
                .outputMode('append')
                .queryName("SilverTrafficWriteStream")
                .trigger(availableNow=True)
                .toTable(f"`{environment}_catalog`.`silver`.`silver_traffic`"))
    
    write_StreamSilver.awaitTermination()
    
    print(f'Writing `{environment}_catalog`.`silver`.`silver_traffic` Success!')

### Before adding new data

In [0]:
# Check if the table exists
tableExists = spark._jsparkSession.catalog().tableExists(f"{env}_catalog.silver.silver_traffic")

if tableExists:
    # If the table exists, execute the query
    query = f"SELECT count(*) as `Num of Rows` FROM `{env}_catalog`.`silver`.`silver_traffic`"
    df = spark.sql(query)
    display(df)
else:
    print("Table does not exist.")

Num of Rows
55638


## Calling all the functions

In [0]:
## Reading the bronze traffic data
df_trafficdata = read_BronzeTrafficTable(env)

# To remove duplicate rows
df_noDups = remove_Dups(df_trafficdata)

# Get dataframe columns name to a list
Allcolumns = df_noDups.schema.names

# Replace any NULL values in strings and numeric datatypes
df_noNulls = handle_NULLs(df_noDups, Allcolumns)

## To get the total EV_Count
df_ev = ev_Count(df_noNulls)

## To get the Total Motor vehicle count
df_motor = Motor_Count(df_ev)

## Calling Transformed time function
df_final = create_TransformedTime(df_motor)

## Writing to silver_traffic
write_Traffic_SilverTable(df_final, env)

Reading the Bronze Table Data : Reading dev_catalog.bronze.raw_traffic Success!
Removing Duplicate values: Success!
Replacing NULLs of String column DataType with "Unknown": Success!
Replacing NULLs of Numeric column DataType with "0":  Success!
Creating Electric Vehicles Count Column : Success!! 
Creating All Motor Vehicles Count Column : Success!! 
Creating Transformed Time column : Success!!
Writing the silver_traffic Data : Writing `dev_catalog`.`silver`.`silver_traffic` Success!


### After adding new data

In [0]:
 %sql

SELECT count(*) as `Num of Rows` FROM `dev_catalog`.`silver`.`silver_traffic`

Num of Rows
55638


### Display small sample of data

In [0]:
%sql

SELECT * FROM `dev_catalog`.`silver`.`silver_traffic` Limit 5

Record_ID,Count_point_id,Direction_of_travel,Year,Count_date,hour,Region_id,Region_name,Local_authority_name,Road_name,Road_Category_ID,Start_junction_road_name,End_junction_road_name,Latitude,Longitude,Link_length_km,Pedal_cycles,Two_wheeled_motor_vehicles,Cars_and_taxis,Buses_and_coaches,LGV_Type,HGV_Type,EV_Car,EV_Bike,Extract_Time,Electric_Vehicles_Count,Motor_Vehicles_Count,Transformed_Time
37258,83012,S,2014,6/24/2014 0:00,18,3,Scotland,Glasgow City,A728,4,A730,A749,55.83986935,-4.22892245,1.1,21,1,220,0,28,2,1,2,2024-09-28T03:16:27.027Z,3,254,2024-09-28T03:19:32.537Z
37291,84016,N,2014,10/2/2014 0:00,15,10,West Midlands,Warwickshire,M6(T),1,M6 Toll and M42 Interchange south of Coleshill,M6 Toll and M42 Interchange south of Gilson - Junction 8,52.49685234,-1.717616583,1.1,0,21,3424,9,620,172,34,38,2024-09-28T03:16:27.027Z,72,4318,2024-09-28T03:19:32.537Z
37297,930183,E,2014,9/9/2014 0:00,9,3,Scotland,City of Edinburgh,B6415,5,Unknown,Unknown,55.95072928,-3.106343827,0.0,4,2,305,13,35,5,0,0,2024-09-28T03:16:27.027Z,0,360,2024-09-28T03:19:32.537Z
37439,80849,W,2014,9/18/2014 0:00,9,9,South East,Portsmouth,A2030,4,MARKETWAY,HOLBROOK ROAD,50.80316638,-1.084491131,0.4,23,5,306,24,30,3,0,1,2024-09-28T03:16:27.027Z,1,369,2024-09-28T03:19:32.537Z
37656,56029,S,2014,7/4/2014 0:00,16,6,London,Redbridge,A406,4,Centre point M11 Slip roads,A12 redbridge r/abt,51.58130058,0.040520421,1.2,0,37,3229,20,695,177,13,17,2024-09-28T03:16:27.027Z,30,4188,2024-09-28T03:19:32.537Z
