In [0]:
import pyspark.sql.types as T
import pyspark.sql.functions as F
import yaml

In [0]:
def create_update_column_metadata(table_name, file_path):
    try:
        with open(file_path, "r") as file:
            column_comments = yaml.safe_load(file)["columns"]

            for column, comment in column_comments.items():
                spark.sql(f"ALTER TABLE {table_name} ALTER COLUMN {column} COMMENT '{comment}'")
    except Exception as e:
        print(f"Error during update column metadata: {e}")

In [0]:
source_table_name_after = "ifood_case.bronze.fhv_after_202301"
source_table_name = "ifood_case.bronze.fhv_202301"
silver_table_name = "ifood_case.silver.fhv"

In [0]:
df_after_202301 = spark.read.table(source_table_name_after)
df_202301 = spark.read.table(source_table_name)

In [0]:
column_sr_flag = (
    F.when(F.col("SR_Flag") == 1, True)
     .otherwise(False)
)

In [0]:
df_202301 = (
    df_202301
        .withColumn("fl_shared_trip", column_sr_flag)
        .select(
            F.col("dispatching_base_num").alias("ds_dispatching_base_number").cast(T.StringType()),
            F.col("pickup_datetime").alias("ts_pickup"),
            F.col("dropOff_datetime").alias("ts_dropoff"),
            F.col("PULocationID").alias("id_pickup_location").cast(T.IntegerType()),
            F.col("DOLocationID").alias("id_dropoff_location").cast(T.IntegerType()),
            F.col("fl_shared_trip").cast(T.BooleanType()),
            F.col("Affiliated_base_number").alias("ds_affiliated_base_number").cast(T.StringType())       
        )
)

df_after_202301 = (
    df_after_202301
        .withColumn("fl_shared_trip", column_sr_flag)
        .select(
            F.col("dispatching_base_num").alias("ds_dispatching_base_number").cast(T.StringType()),
            F.col("pickup_datetime").alias("ts_pickup"),
            F.col("dropOff_datetime").alias("ts_dropoff"),
            F.col("PULocationID").alias("id_pickup_location").cast(T.IntegerType()),
            F.col("DOLocationID").alias("id_dropoff_location").cast(T.IntegerType()),
            F.col("fl_shared_trip").cast(T.BooleanType()),
            F.col("Affiliated_base_number").alias("ds_affiliated_base_number").cast(T.StringType())   
        )
)

df_silver = df_after_202301.union(df_202301).coalesce(1)

In [0]:
df_silver = (
    df_silver
        .dropDuplicates()
        .dropna(how="all")
)

In [0]:
df_silver.write.format("delta").mode("overwrite").saveAsTable(f"{silver_table_name}")

In [0]:
create_update_column_metadata(silver_table_name, "./metadata/fhv.yaml")