In [0]:
from pyspark.sql.types import *
from pyspark.sql.functions import *

# 1. ADLS Configuration (تأكدنا من إزالة الرموز الزائدة)
spark.conf.set(
    "fs.azure.account.key.hospitalstorge17.dfs.core.windows.net",
    "WvLwnc4iFRFgHew6zE9Y55DklpACeOYhA5a3Uz5oCbe16zK0seQxJRs96m5KV4lpDHY56LkqZ3NH+AStfb+C3A=="
)

# 2. Define Paths (تم استخدام dfs لتطابق المفتاح)
bronze_path = "abfss://bronze@hospitalstorge17.dfs.core.windows.net/bronze_real"
silver_path = "abfss://silver@hospitalstorge17.dfs.core.windows.net/silver_real"

# 3. Read from Bronze
bronze_df = (
    spark.readStream
    .format("delta")
    .load(bronze_path)
)

# 4. Define Schema
schema = StructType([
    StructField("patient_id", StringType()),
    StructField("gender", StringType()),
    StructField("age", IntegerType()),
    StructField("department", StringType()),
    StructField("admission_time", StringType()),
    StructField("discharge_time", StringType()),
    StructField("bed_id", IntegerType()),
    StructField("hospital_id", IntegerType())
])

# 5. Parse JSON
parsed_df = bronze_df.withColumn("data", from_json(col("raw_json"), schema)).select("data.*")

# 6. Clean Data & Transformations
# Convert type to Timestamp
clean_df = parsed_df.withColumn("admission_time", to_timestamp("admission_time"))
clean_df = clean_df.withColumn("discharge_time", to_timestamp("discharge_time"))

# Handle invalid admission_times
clean_df = clean_df.withColumn("admission_time",
                               when(
                                   col("admission_time").isNull() | (col("admission_time") > current_timestamp()),
                                   current_timestamp())
                               .otherwise(col("admission_time")))

# Handle Invalid Age
clean_df = clean_df.withColumn("age",
                               when(col("age") > 100, floor(rand() * 90 + 1).cast("int"))
                               .otherwise(col("age"))
                               )

# 7. Schema Evolution Handling
expected_cols = ["patient_id", "gender", "age", "department", "admission_time", "discharge_time", "bed_id", "hospital_id"]

for col_name in expected_cols:
    if col_name not in clean_df.columns:
        clean_df = clean_df.withColumn(col_name, lit(None))

# 8. Write to Silver Table
(
    clean_df.writeStream
    .format("delta")
    .outputMode("append")
    .option("mergeSchema", "true")
    # يفضل وضع نقطة التحقق في مجلد منفصل لسهولة الإدارة
    .option("checkpointLocation", "abfss://silver@hospitalstorge17.dfs.core.windows.net/_checkpoints/silver_processing")
    .start(silver_path)
)

<pyspark.sql.connect.streaming.query.StreamingQuery at 0x7f9111836e40>