In [0]:
spark.sql("CREATE DATABASE IF NOT EXISTS delta_lab")
spark.sql("USE delta_lab")

In [0]:
from pyspark.sql.functions import *
from datetime import datetime, timedelta
import random

def generate_ecommerce_data(num_records=10000):
    categories = ["Electronics", "Clothing", "Books", "Home & Garden", "Sports"]
    regions = ["North", "South", "East", "West", "Central"]
    
    data = []
    for i in range(num_records):
        record = {
            "order_id": f"ORD{i:06d}",
            "customer_id": f"CUST{random.randint(1, 1000):04d}",
            "product_category": random.choice(categories),
            "region": random.choice(regions),
            "order_amount": random.randint(1, 10000),
            "quantity": random.randint(1, 10),
            "order_date": (datetime.now() - timedelta(days=random.randint(0, 365))).strftime("%Y-%m-%d"),
            "status": random.choice(["Completed", "Pending", "Cancelled"])
        }
        data.append(record)
    
    return spark.createDataFrame(data)

df_orders = generate_ecommerce_data(10000)
display(df_orders)

In [0]:
# Define the base path for our tables
base_path = "s3://databricks-miraj/c8e900d7-335a-4b8c-bb1f-98b7562c6e72/lab1/"
dbutils.fs.rm(base_path, True)  
# Clean up if exists
# Write data as Delta Lake table
delta_table_path = f"{base_path}orders_delta"

df_orders.write \
    .format("delta") \
    .mode("overwrite") \
    .option("path", delta_table_path) \
    .saveAsTable("delta_lab.orders")

print(f"Created Delta table at: {delta_table_path}")

In [0]:
# List files in the Delta table directory
display(dbutils.fs.ls(delta_table_path))

# Look at the _delta_log directory
display(dbutils.fs.ls(f"{delta_table_path}/_delta_log/"))

# Read the first commit log
first_commit = spark.read.json(f"{delta_table_path}/_delta_log/00000000000000000000.json")
display(first_commit)

In [0]:
# Describe the table
spark.sql("DESCRIBE EXTENDED delta_lab.orders").show(50, False)

# Show table history
spark.sql("DESCRIBE HISTORY delta_lab.orders").show(10, False)