# INTRODUCTION

In [None]:

# Import SparkSession
import pyspark
from delta import configure_spark_with_delta_pip

builder = pyspark.sql.SparkSession.builder.appName("STREAMING_DWH") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")

spark = configure_spark_with_delta_pip(builder).getOrCreate()


In [None]:
# To allow automatic schemaInference while reading
spark.conf.set("spark.sql.streaming.schemaInference", True)

# Create the streaming_df to read from input directory
streaming_df = spark \
    .readStream \
    .format("json") \
    .option("maxFilesPerTrigger", 1) \
    .load("data/product/")

streaming_df.printSchema()

In [None]:
# Define database schema
spark.sql("CREATE SCHEMA IF NOT EXISTS bronze;")
spark.sql("USE SCHEMA bronze;")

In [None]:
from pyspark.sql.functions import current_timestamp, input_file_name

def bronze_ingestion_query(table_name, schema_name="bronze"):
    ### Generates a source path based on table name, reads all files from that and inserts into bronze schema ###

    query = (
        spark.readStream
        .format("json")
        .option("maxFilesPerTrigger", 1)
        .load(f"data/{table_name}")
        .withColumn("meta_timestamp", current_timestamp())
        .withColumn("meta_filename", input_file_name())
        .writeStream
        .outputMode("append")
        .format("delta")
        .option("checkpointLocation", f"spark-warehouse/_checkpoints/{schema_name}.{table_name}")
        .toTable(f"bronze.{table_name}")
    )
    return query

query1 = bronze_ingestion_query(table_name="inventory")
query2 = bronze_ingestion_query(table_name="product")
query3 = bronze_ingestion_query(table_name="purchase")

# Use the code 
# spark.streams.awaitAnyTermination()


In [None]:
spark.sql("SELECT * FROM bronze.product").show(15)

In [None]:
spark.sql("SELECT current_schema()").show()
spark.sql("DESCRIBE SCHEMA EXTENDED bronze;").show()
spark.sql("SHOW TABLES IN bronze;").show()