### Get environment name from widget and assign to variable 

In [0]:
dbutils.widgets.text(name="env", defaultValue='', label='Enter the environment in lower case')
env = dbutils.widgets.get("env")


<br>

### Call common notebook to access shared variables and methods

In [0]:
%run "./3_Common"


### Reading the data from bronze raw_roads Table

In [0]:

def read_BronzeRoadsTable(environment):
    print('Reading the Bronze Table raw_roads Data : ',end='')
    df_bronzeRoads = (spark.readStream
                    .table(f"`{environment}_catalog`.`bronze`.raw_roads")
                    )
    print(f'Reading {environment}_catalog.bronze.raw_roads Success!')
    print("**********************************")
    return df_bronzeRoads


### Creating road_category_name column

In [0]:
def road_Category(df):
    
    print('Creating Road Category Name Column: ', end='')

    from pyspark.sql.functions import when, col

    df_road_Cat = df.withColumn("Road_Category_Name",
                  when(col('Road_Category') == 'TA', 'Class A Trunk Road')
                  .when(col('Road_Category') == 'TM', 'Class A Trunk Motor')
                   .when(col('Road_Category') == 'PA','Class A Principal road')
                    .when(col('Road_Category') == 'PM','Class A Principal Motorway')
                    .when(col('Road_Category') == 'M','Class B road')
                    .otherwise('NA')
                  
                  )
    print('Success!! ')
    print('***********************')
    return df_road_Cat


### Creating road_type column



In [0]:
def road_Type(df):
    
    print('Creating Road Type Name Column: ', end='')

    from pyspark.sql.functions import when, col

    df_road_Type = df.withColumn("Road_Type",
                  when(col('Road_Category_Name').like('%Class A%'),'Major')
                  .when(col('Road_Category_Name').like('%Class B%'),'Minor')
                    .otherwise('NA')
                  
                  )
    print('Success!! ')
    print('***********************')
    return df_road_Type

OR

In [0]:
def road_Type_Alt(df):
    print('Creating Road Type Name Column: ', end='')

    from pyspark.sql.functions import when, col

    df_road_Type = df.withColumn("Road_Type",
                  when(col('Road_Category_Name').contains('Class A'), 'Major')
                  .when(col('Road_Category_Name').contains('Class B'), 'Minor')
                  .otherwise('NA')
                  )
    print('Success!! ')
    print('***********************')
    return df_road_Type



### Writing data to silver_roads in Silver schema

In [0]:
def write_Roads_SilverTable(StreamingDF, environment):

    print('Writing the silver_roads Data : ',end='') 

    write_StreamSilver_Road = (StreamingDF.writeStream
                .format('delta')
                .option('checkpointLocation', checkpoint+ "/SilverRoadsLoad/Checkpt/")
                .outputMode('append')
                .queryName("SilverRoadsWriteStream")
                .trigger(availableNow=True)
                .toTable(f"`{environment}_catalog`.`silver`.`silver_roads`"))
    
    write_StreamSilver_Road.awaitTermination()

    print(f'Writing `{environment}_catalog`.`silver`.`silver_roads` Success!')

### Before adding new data

In [0]:
# Check if the table exists
tableExists = spark._jsparkSession.catalog().tableExists(f"{env}_catalog.silver.silver_roads")

if tableExists:
    # If the table exists, execute the query
    query = f"SELECT count(*) as `Num of Rows` FROM `{env}_catalog`.`silver`.`silver_roads`"
    df = spark.sql(query)
    display(df)
else:
    print("Table does not exist.")


### Calling all the functions

In [0]:
# Start by reading the data from the Bronze table
df_roads = read_BronzeRoadsTable(env)

# remove duplicates
df_noDups = remove_Dups(df_roads)

# Get dataframe columns name to a list
AllColumns = df_noDups.schema.names

# Handle null values in strings and numeric datatypes
df_clean = handle_NULLs(df_noDups, AllColumns)

## Creating Road_Category_name 
df_roadCat = road_Category(df_clean)

## Creating Road_Type column
df_type = road_Type(df_roadCat)

## Writing data to silver_roads table
write_Roads_SilverTable(df_type,env)

### After adding new data

In [0]:
%sql

SELECT count(*) as `Num of Rows` FROM `dev_catalog`.`silver`.`silver_roads`

### Display small sample of data

In [0]:
%sql

SELECT * FROM `dev_catalog`.`silver`.`silver_roads` Limit 5