##### 1. Import and load table

In [8]:
from delta.tables import DeltaTable
from pyspark.sql import functions as F
from pyspark.sql import functions as F, Window

# Read a Delta table into a Spark DataFrame
df = spark.read.format("delta").load("Tables/game_shifts_bronze")

StatementMeta(, d87e3f4c-0736-4b4b-90e5-6f5e89675dbf, 10, Finished, Available, Finished)

##### 2. Clean and transform data

In [9]:
# 1. Cast data types
df = (
    df
    .withColumn("game_id", F.col("game_id").cast("string"))
    .withColumn("player_id", F.col("player_id").cast("string"))
    .withColumn("period", F.col("period").cast("int"))
    .withColumn("shift_start", F.col("shift_start").cast("int"))
    .withColumn("shift_end", F.col("shift_end").cast("int"))
)

# 2. Drop duplicates
df = df.dropDuplicates()

# 3. Count rows and columns in cleaned data
#rows_final = df.count()
#cols_final = len(df.columns)

#print("\nNumber of rows:", rows_final)
#print("Number of columns:", cols_final)
#df.show()

# 4. Function to convert camelCase or PascalCase to snake_case
def to_snake_case(name):
    return ''.join(['_' + c.lower() if c.isupper() else c for c in name]).lstrip('_')

# Apply to all columns in one line
df = df.toDF(*[to_snake_case(c) for c in df.columns])

# 5. Show schema and sample
#df.printSchema()
#display(df.limit(5))

StatementMeta(, d87e3f4c-0736-4b4b-90e5-6f5e89675dbf, 11, Finished, Available, Finished)

##### 3. Load data to silver table

In [10]:
# Incoming Bronze dataframe
source_df = df  
key_col = "game_id"

# Path to Silver Lakehouse Delta table
target_path = "abfss://dc478dd4-e53e-4f21-add0-2e376dc173fe@onelake.dfs.fabric.microsoft.com/ce7ef0e9-78af-44db-b5ee-839dcf1c9e98/Tables/game_shifts_silver"

if DeltaTable.isDeltaTable(spark, target_path):
    # Load existing target table
    existing_df = spark.read.format("delta").load(target_path).select(key_col).distinct()

    # Keep only new keys
    new_rows_df = source_df.join(existing_df, on=key_col, how="left_anti")

    if new_rows_df.limit(1).count() > 0:
        (new_rows_df.write
            .format("delta")
            .mode("append")
            .save(target_path))
        print(f"✅ Appended {new_rows_df.count()} new rows to game_shifts_silver in Lakehouse_Silver.")
    else:
        print("No new rows to append. game_shifts_silver is already up to date.")
else:
    # First load → create the Silver table
    (source_df.write
        .format("delta")
        .mode("overwrite")
        .save(target_path))
    print(f"✅ Initial load complete: created game_shifts_silver in Lakehouse_Silver with {source_df.count()} rows.")


StatementMeta(, d87e3f4c-0736-4b4b-90e5-6f5e89675dbf, 12, Submitted, Running, Running)

In [2]:
"""

import pyspark.sql.functions as F
from pyspark.sql import SparkSession
from delta.tables import DeltaTable  # ✅ Needed for DeltaTable

spark = SparkSession.builder.getOrCreate()

# 1. Load the data into df
df = spark.read.format("delta").load("Tables/game_shifts_bronze")
display(df.limit(5))

# 2. Cast data types
df_clean = (
    df
    .withColumn("game_id", F.col("game_id").cast("string"))
    .withColumn("player_id", F.col("player_id").cast("string"))
    .withColumn("period", F.col("period").cast("int"))
    .withColumn("shift_start", F.col("shift_start").cast("int"))
    .withColumn("shift_end", F.col("shift_end").cast("int"))
)

# 3. Drop duplicates
df_clean = df_clean.dropDuplicates()

# 4. Count rows and columns in cleaned data
rows_final = df_clean.count()
cols_final = len(df_clean.columns)

print("\nNumber of rows:", rows_final)
print("Number of columns:", cols_final)
df_clean.show()

# 5. Function to convert camelCase or PascalCase to snake_case
def to_snake_case(name):
    return ''.join(['_' + c.lower() if c.isupper() else c for c in name]).lstrip('_')

# 6. Rename all columns to snake_case
df_clean = df_clean.toDF(*[to_snake_case(c) for c in df_clean.columns])  # ⚠️ Changed from df to df_clean

# 7. Show schema and sample
df_clean.printSchema()
display(df_clean.limit(5))

# 8. Write to Delta table (Lakehouse Silver path)
target_path = "abfss://dc478dd4-e53e-4f21-add0-2e376dc173fe@onelake.dfs.fabric.microsoft.com/ce7ef0e9-78af-44db-b5ee-839dcf1c9e98/Tables/game_shifts_silver"

try:
    # If table exists, perform upsert/merge
    delta_tbl = DeltaTable.forPath(spark, target_path)
    
    # Example merge logic (based on primary keys)
    delta_tbl.alias("tgt").merge(
        df_clean.alias("src"),
        "tgt.game_id = src.game_id AND tgt.player_id = src.player_id AND tgt.period = src.period"
    ).whenMatchedUpdateAll().whenNotMatchedInsertAll().execute()

    print("Delta table updated successfully.")

except Exception as e:
    # If table does not exist, write as new
    print("Delta table not found. Writing new table...")
    df_clean.write.format("delta").mode("overwrite").save(target_path)
    print("Delta table written successfully.")

"""

StatementMeta(, cd970548-7e5b-485d-b29b-53541558373a, 4, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, e3bdf8a0-7f2f-49d3-983f-e6db4dd872ab)


Number of rows: 9900705
Number of columns: 5
+----------+---------+------+-----------+---------+
|   game_id|player_id|period|shift_start|shift_end|
+----------+---------+------+-----------+---------+
|2012030152|  8466148|     2|       1844|     1871|
|2012030152|  8473504|     3|       2706|     2755|
|2012030152|  8465058|     1|        801|      851|
|2012030153|  8462129|     2|       1935|     1947|
|2012030153|  8470803|     1|        267|      288|
|2012030153|  8473604|     1|        657|      697|
|2012030153|  8466148|     1|        901|      954|
|2012030154|  8476463|     1|        181|      210|
|2012030155|  8469665|     1|        755|      781|
|2012030155|  8473504|     3|       2568|     2604|
|2012030155|  8466148|     2|       2214|     2250|
|2012030155|  8474141|     1|        384|      443|
|2012030155|  8475745|     3|       3407|     3475|
|2014030311|  8471958|     3|       3106|     3115|
|2014030311|  8474151|     1|        904|      928|
|2014030311|  8466

SynapseWidget(Synapse.DataFrame, a09c64a8-1751-414a-a427-bf457158cb6b)

Delta table not found. Writing new table...
Delta table written successfully.
