Notebook responsável por criar a tabela com os dados de taxi na camada gold.

In [0]:
import sys
import os
sys.path.append('../utils') 

from utils import create_update_table_metadata
import pyspark.sql.functions as F
import pyspark.sql.types as T

In [0]:
notebook_path = dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()
folder_path = os.path.dirname(notebook_path)

source_table_name_green = "ifood_case.silver.tb_ft_trip_green"
source_table_name_yellow = "ifood_case.silver.tb_ft_trip_yellow"
table_name = "tb_taxi_trip_data"
gold_table_name = f"ifood_case.gold.{table_name}"

In [0]:
df_green = spark.read.table(source_table_name_green)
df_yellow = spark.read.table(source_table_name_yellow)

In [0]:
df_green_gold = (
    df_green
        .withColumn("cd_taxi_type", F.lit("green")) 
        .select(
            F.col("id_vendor"),
            F.col("ts_pickup"),
            F.col("ts_dropoff"),
            F.col("nb_passenger_count"),
            F.col("vl_total_amount"),
            F.col("cd_taxi_type")
        )
)

df_yellow_gold = (
    df_yellow
        .withColumn("cd_taxi_type", F.lit("yellow")) 
        .select(
            F.col("id_vendor"),
            F.col("ts_pickup"),
            F.col("ts_dropoff"),
            F.col("nb_passenger_count"),
            F.col("vl_total_amount"),
            F.col("cd_taxi_type")
        )
)

In [0]:
df_gold = df_green_gold.union(df_yellow_gold)

In [0]:
df_gold.write.format("delta").mode("overwrite").saveAsTable(f"{gold_table_name}")

In [0]:
metadata_file_path = f"/Workspace/{folder_path}/metadata/{table_name}.yaml"
create_update_table_metadata(spark, gold_table_name, metadata_file_path)