In [0]:
import pandas as pd

# Create sample sales data
data = {
    "OrderID": [1, 2, 3, 4],
    "OrderDate": ["2024-01-01 10:00:00", "2024-01-02 11:00:00", "2024-01-03 12:00:00", "2024-01-04 13:00:00"],
    "CustomerID": ["C001", "C002", "C003", "C004"],
    "Product": ["ProductA", "ProductB", "ProductC", "ProductD"],
    "Quantity": [10, 20, 15, 5],
    "Price": [100.0, 200.0, 150.0, 50.0]
}

# Convert to DataFrame 
df_sales = pd.DataFrame(data)

# Save as CSV
csv_path = "/dbfs/FileStore/sales_data.csv" 
df_sales.to_csv(csv_path, index=False)

# Save as Parquet
parquet_path = "/dbfs/FileStore/sales_data.parquet"
df_sales.to_parquet(parquet_path, index=False)

print(f"Sample data saved to {csv_path} and {parquet_path}")

Sample data saved to /dbfs/FileStore/sales_data.csv and /dbfs/FileStore/sales_data.parquet


In [0]:
# Initialize SparkSession
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, to_timestamp

spark = SparkSession.builder.appName("DeltaLiveTableExample").getOrCreate()

# Load data from CSV
df_sales = spark.read.format("csv").option("header", "true").load("/FileStore/sales_data.csv")

# Transform the data: Add a new column for total amount
df_transformed = df_sales.withColumn("TotalAmount", col("Quantity").cast("int") * col("Price").cast("double"))

# Write transformed data to Delta table
delta_table_path="/delta/sales_data"
df_transformed.write.format("delta").mode("overwrite").save(delta_table_path)

print("Delta table created and data written successfully.")

Delta table created and data written successfully.


In [0]:
import dlt

@dlt.table
def sales_data():
    df = spark.read.format("delta").load(delta_table_path)
    return df.select(
        col("OrderID"),
        col("OrderDate"),
        col("CustomerID"),
        col("Product"),
        col("Quantity"),
        col("Price"),
        (col("Quantity").cast("int") * col("Price").cast("double")).alias ("TotalAmount")
    )
print("Delta Live Table created.")

Delta Live Table created.


Name,Type
OrderID,string
OrderDate,string
CustomerID,string
Product,string
Quantity,string
Price,string
TotalAmount,double
