In [0]:
from pyspark.sql import functions as f
from pyspark.sql.types import *
from datetime import datetime
import pytz

In [0]:
dest_catalog_name   = 'bronze'
dest_database_name  = 'ingestion'
dest_table_name     = 'video_games_sales'


print(f""" > dest_catalog_name:   {dest_catalog_name}""")
print(f""" > dest_database_name:  {dest_database_name}""")
print(f""" > dest_table_name:     {dest_table_name}""")

In [0]:
ingestion_timestamp = datetime.now(pytz.timezone('America/Sao_Paulo')).strftime('%Y%m%d%H%M%S')
print(f"""ingestion_timestamp: {ingestion_timestamp}""")
print(type(ingestion_timestamp))

In [0]:
file_name_contains = 'Video_Games_Sales'
for file in dbutils.fs.ls('/Volumes/workspace/default/landing_zone/'):
    if file.name.__contains__(file_name_contains):
        source_file = file.path.replace("dbfs:","")
        print(source_file)

In [0]:
struct = StructType([StructField('Name', StringType(), True), StructField('Platform', StringType(), True), StructField('Year_of_Release', StringType(), True), StructField('Genre', StringType(), True), StructField('Publisher', StringType(), True), StructField('NA_Sales', StringType(), True), StructField('EU_Sales', StringType(), True), StructField('JP_Sales', StringType(), True), StructField('Other_Sales', StringType(), True), StructField('Global_Sales', StringType(), True), StructField('Critic_Score', StringType(), True), StructField('Critic_Count', StringType(), True), StructField('User_Score', StringType(), True), StructField('User_Count', StringType(), True), StructField('Developer', StringType(), True), StructField('Rating', StringType(), True)])

In [0]:
df = (
    spark
    .read
    .format("csv")
    .option('sep', ',')
    .option('header', 'true')
    .schema(struct)
    .load(source_file)
)

df = df.withColumn("ingestion_timestamp", f.lit(ingestion_timestamp).cast('long'))

#display(df.limit(10))

In [0]:
spark.sql(f"CREATE CATALOG if NOT EXISTS {dest_catalog_name}")

In [0]:
spark.sql(f"CREATE SCHEMA if not EXISTS {dest_catalog_name}.{dest_database_name}")

In [0]:
(
    df
    .write
    .format("delta")
    .mode("append")
    .saveAsTable(f"{dest_catalog_name}.{dest_database_name}.{dest_table_name}")
)

In [0]:
# display(
#   spark
#   .table(f"{dest_catalog_name}.{dest_database_name}.{dest_tabele_name}")
#   .limit(10)
# )

In [0]:
#remove source file
dbutils.fs.rm(source_file)


In [0]:
# spark.sql( 'DROP TABLE IF EXISTS bronze.ingestion.video_games_sales' )