In [ ]:
from pyspark.sql import SparkSession
from delta.tables import DeltaTable

In [ ]:
# Create Spark session
spark = SparkSession.builder.appName("CSV to Delta").getOrCreate()

In [ ]:
source_location = ""
relative_file_path = ""

In [ ]:
parts = source_location.split("/")
tenant_id = parts[2]
workspace_id = parts[4]
workspace_id = workspace_id.replace(" ", "")
lakehouse_id = parts[6]
lakehouse_id = lakehouse_id.replace(" ", "")
relative_file_path = relative_file_path.replace(" ", "")

delta_table_path = "Tables/shipmentevents"


In [ ]:
# ABFSS paths for input CSV and Delta output
csv_path = f"abfss://{workspace_id}@onelake.dfs.fabric.microsoft.com/{lakehouse_id}/{relative_file_path}"
delta_output_path = f"abfss://{workspace_id}@onelake.dfs.fabric.microsoft.com/{lakehouse_id}/{delta_table_path}"

In [ ]:
# Check if Delta table exists
table_exists = DeltaTable.isDeltaTable(spark, delta_output_path)
if table_exists:
    delta_table = DeltaTable.forPath(spark, delta_output_path)
    before_count = delta_table.toDF().count()
    print(f"Rows in Delta table before load: {before_count}")
else:
    before_count = 0
    print("Delta table does not exist. It will be created.")

# Read CSV with proper quote handling for embedded JSON
df = spark.read \
    .option("header", "true") \
    .option("inferSchema", "true") \
    .option("quote", "\"") \
    .option("escape", "\"") \
    .csv(csv_path)

csv_count = df.count()
print(f"CSV rows read: {csv_count}")

# Write to Delta (overwrite mode)
df.write.format("delta").mode("append").save(delta_output_path)
print("CSV data loaded into Delta table.")

# Count rows after load
delta_table = DeltaTable.forPath(spark, delta_output_path)
after_count = delta_table.toDF().count()
print(f"Rows in Delta table after load: {after_count}")