In [0]:
from pyspark.sql import functions as f
from pyspark.sql.types import *
from datetime import datetime
import pytz
import re

In [0]:
%run ../structs/nb_schemas

In [0]:

schema_config = _schemas[dbutils.widgets.get("ingestion_object")]

schema = schema_config["schema"]
params = schema_config["parameters"]

src_api                 = params["src_api"]
src_api_dataset         = params["src_api_dataset"]
src_file_name_contains  = params["src_file_name_contains"]
src_file_format         = params["src_file_format"]

dest_catalog_name       = params["dest_catalog_name"]
dest_schema_name        = params["dest_schema_name"]
dest_table_name         = params["dest_table_name"]


print(f"""schema: {schema}""")
print()

print(f"""src_api:                  '{src_api}'""")
print(f"""src_api_dataset:          '{src_api_dataset}'""")
print(f"""src_file_name_contains:   '{src_file_name_contains}'""")
print(f"""src_file_format:          '{src_file_format}'""")
print()
print(f"""dest_catalog_name:        '{dest_catalog_name}'""")
print(f"""dest_schema_name:         '{dest_schema_name}'""")
print(f"""dest_table_name:          '{dest_table_name}'""")


In [0]:
ingestion_timestamp = datetime.now(pytz.timezone('America/Sao_Paulo')).strftime('%Y%m%d%H%M%S')
print(f"""ingestion_timestamp: {ingestion_timestamp}""")
print(type(ingestion_timestamp))

In [0]:
for file in dbutils.fs.ls('/Volumes/workspace/default/landing_zone/'):
    if file.name.__contains__(src_file_name_contains):
        source_file = file.path.replace("dbfs:","")
        print(source_file)

In [0]:
df = (
    spark
    .read
    .format(src_file_format)
    .option('sep', ',')
    .option('header', 'true')
    .schema(schema)
    .load(source_file)
)

for col_name in df.columns:
    new_name = re.sub(r"[ ,;{}\(\)\n\t=]", "_", col_name.lower())
    if col_name != new_name:
        df = df.withColumnRenamed(col_name, new_name)

df = df.withColumn("ingestion_timestamp", f.lit(ingestion_timestamp).cast("long"))

# Exibe os primeiros registros
# display(df.limit(10))

In [0]:
spark.sql(f"CREATE CATALOG if NOT EXISTS {dest_catalog_name}")

In [0]:
spark.sql(f"CREATE SCHEMA if not EXISTS {dest_catalog_name}.{dest_schema_name}")

In [0]:
(
    df
    .write
    .format("delta")
    .mode("append")
    .saveAsTable(f"{dest_catalog_name}.{dest_schema_name}.{dest_table_name}")
)

In [0]:
# display(
#   spark
#   .table(f"{dest_catalog_name}.{dest_schema_name}.{dest_table_name}")
#   .limit(10)
# )

In [0]:
#remove source file
#dbutils.fs.rm(source_file)


In [0]:
#spark.sql( 'DROP TABLE IF EXISTS bronze.ingestion.video_games_sales' )