
# Bronze Notebook - Ingestion

In [0]:
# Define S3 bucket and Bronze database path
s3_bucket = "s3://r2de-bucket"
bronze_db_path = "r2de_project.bronze"


## Extract Data From S3
- CSV File
- JSON File

In [0]:
# Ingest Customer
raw_customer = spark.read.csv(f"{s3_bucket}/raw/customer.csv", header=True)
raw_product = spark.read.csv(f"{s3_bucket}/raw/product.csv", header=True)
raw_transaction = spark.read.csv(f"{s3_bucket}/raw/transaction.csv", header=True)
raw_currency = spark.read.json(f"{s3_bucket}/raw/currency.json", multiLine=True)

In [0]:
print("Customer Raw Data:")
display(raw_customer)
print("Product Raw Data:")
display(raw_product)
print("Transaction Raw Data:")
display(raw_transaction)
print("Currency Raw Data:")
display(raw_currency)


## Ingest Raw Data into Bronze Delta Tables

In [0]:
# 1. Ingest Customer Data into Bronze Delta Table
raw_customer.write.format("delta").mode("overwrite").option("path", f"{s3_bucket}/bronze/customer").saveAsTable(f"{bronze_db_path}.customer")
# 2. Ingest Product Data into Bronze Delta Table
raw_product.write.format("delta").mode("overwrite").option("path", f"{s3_bucket}/bronze/product").saveAsTable(f"{bronze_db_path}.product")
# 3. Ingest Transaction Data into Bronze Delta Table
raw_transaction.write.format("delta").mode("overwrite").option("path", f"{s3_bucket}/bronze/transaction").saveAsTable(f"{bronze_db_path}.transaction")
# 4. Ingest Currency Data into Bronze Delta Table
raw_currency.write.format("delta").mode("overwrite").option("path", f"{s3_bucket}/bronze/currency").saveAsTable(f"{bronze_db_path}.currency")

In [0]:
bronze_customer = spark.sql(f"SELECT * FROM {bronze_db_path}.customer")
bronze_product = spark.sql(f"SELECT * FROM {bronze_db_path}.product")
bronze_transaction = spark.sql(f"SELECT * FROM {bronze_db_path}.transaction")
bronze_currency = spark.sql(f"SELECT * FROM {bronze_db_path}.currency")

In [0]:
print("Customer Bronze Data:")
display(bronze_customer)
print("Product Bronze Data:")
display(bronze_product)
print("Transaction Bronze Data:")
display(bronze_transaction)
print("Currency Bronze Data:")
display(bronze_currency)