Notebook responsável pela criação da tabela dimensão relacionada aos tipos de tarifas (RateCodeID).

Informações retiradas das documentações em:

https://www.nyc.gov/assets/tlc/downloads/pdf/data_dictionary_trip_records_yellow.pdf
https://www.nyc.gov/assets/tlc/downloads/pdf/data_dictionary_trip_records_green.pdf

In [0]:
import sys
import os
sys.path.append('../utils') 

from utils import create_update_table_metadata
import pyspark.sql.functions as F
import pyspark.sql.types as T

In [0]:
notebook_path = dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()

folder_path = os.path.dirname(notebook_path)

In [0]:
schema = T.StructType([
    T.StructField("id_rate_code", T.IntegerType(), nullable=False),
    T.StructField("ds_rate_code_name", T.StringType(), nullable=True)
])

rate_code_data = [
    (1, "Standard rate"),
    (2, "JFK"),
    (3, "Newark"),
    (4, "Nassau or Westchester"),
    (5, "Negotiated fare"),
    (6, "Group ride")
]

table_name = "tb_dim_rate_code"
silver_table_name = f"ifood_case.silver.{table_name}"

df_silver = spark.createDataFrame(rate_code_data, schema)

In [0]:
df_silver.write.format("delta").mode("overwrite").saveAsTable(f"{silver_table_name}")

In [0]:
metadata_file_path = f"/Workspace/{folder_path}/metadata/{table_name}.yaml"
create_update_table_metadata(spark, silver_table_name, metadata_file_path)