In [1]:
# Move data from Bronze to Silver
from pyspark.sql.types import IntegerType
from pyspark.sql import functions as F
from pyspark.sql.functions import when

# Bronze Layer
csv_file_path = "Raw.Lakehouse/Files/car_prices.csv"
df_Bronze = spark.read.parquet(
    f"abfss://Test@onelake.dfs.fabric.microsoft.com/Bronze.Lakehouse/Files/CarPrices.Parquet"
)

# Rename, type, and fix data
df_Bronze = df_Bronze.withColumn("ModelYear", F.col("year").cast(IntegerType())).drop(
    "year"
)
df_Bronze = df_Bronze.withColumn("Make", F.initcap(F.col("make")))
df_Bronze = df_Bronze.withColumn("Model", F.initcap(F.col("model")))
df_Bronze = df_Bronze.withColumn("Trim", F.initcap(F.col("trim")))
df_Bronze = df_Bronze.withColumn("Body", F.initcap(F.col("body")))
df_Bronze = df_Bronze.withColumn("Transmission", F.initcap(F.col("transmission")))
df_Bronze = df_Bronze.withColumn("Vin", F.initcap(F.col("vin")))
df_Bronze = df_Bronze.withColumn("SalesState", F.upper(F.col("state"))).drop("state")
df_Bronze = df_Bronze.withColumn(
    "Condition",
    when(F.col("condition") <= 5, F.col("condition")).otherwise(
        F.col("condition").cast("decimal(4,1)") / 10.0
    ),
)
df_Bronze = df_Bronze.withColumn("Miles", F.col("odometer").cast(IntegerType())).drop(
    "odometer"
)
df_Bronze = df_Bronze.withColumn("BodyColor", F.initcap(F.col("color"))).drop("color")
df_Bronze = df_Bronze.withColumn("InteriorColor", F.initcap(F.col("interior"))).drop(
    "interior"
)
df_Bronze = df_Bronze.withColumn("Seller", F.initcap(F.col("seller")))
df_Bronze = df_Bronze.withColumn(
    "ManheirMarketReportValue", F.col("mmr").cast(IntegerType())
).drop("mmr")
df_Bronze = df_Bronze.withColumn(
    "SellingPrice", F.col("sellingprice").cast(IntegerType())
)
df_Bronze = df_Bronze.drop("LakePath")
df_Bronze = df_Bronze.drop("LakeDate")
df_Bronze = df_Bronze.withColumn(
    "SalesDate",
    F.date_format(
        F.to_date(F.substring(F.col("saledate"), 5, 11), "MMM dd yyyy"), "yyyyMMdd"
    ).cast(IntegerType()),
).drop("saledate")

# Specify the location path for the "silver" lakehouse
silver_path = f"abfss://Test@onelake.dfs.fabric.microsoft.com/Silver.Lakehouse/Tables/"

# Save the DataFrame as a Delta table in the specified path
df_Bronze.write.format("delta").mode("overwrite").option(
    "path", f"{silver_path}/CarPrices"
).option("overwriteSchema", "true").saveAsTable("CarPrices")


StatementMeta(, 51e3b748-efa9-49a9-824f-120451bc8b0b, 3, Finished, Available, Finished)

In [2]:
%%sql
select *
from carprices


StatementMeta(, 51e3b748-efa9-49a9-824f-120451bc8b0b, 4, Finished, Available, Finished)

<Spark SQL result set with 1000 rows and 16 fields>