In [0]:
# Create widget for interactive parameter
dbutils.widgets.text(
"layer","silver"
)

# Get and check layer value
layer = dbutils.widgets.get("layer")
print(
    "Current layer:",layer
    )

Current layer: silver


In [0]:
# Import required libraries
from pyspark.sql import functions as F

# Create function to create silver layer
def create_user_feature_table(input_table):

    # Load saved bronze layer data
    bronze_df = spark.table(input_table)

    # Return clean data: silver layer
    return bronze_df.filter(F.col("price") > 0) \
        .filter("user_id IS NOT NULL") \
        .dropDuplicates(["user_id"]) \
        .groupBy("user_id") \
        .agg(F.count("*").alias("total_events"),
        F.sum(
        F.when(F.col("event_type")=="view",1)
        .otherwise(0)
        ).alias("total_views"),
        F.sum(
        F.when(F.col("event_type")=="purchase",1)
        .otherwise(0)
        ).alias("total_purchases"),
        F.avg("price").alias("avg_spent"),
        F.sum("price").alias("total_spent"))

    
# Load data into spark dataframe
october_df = spark.read.csv("/Volumes/workspace/advecom/advecom_data/2019-Oct.csv", header=True, inferSchema=True)

# Drop table to avoid incorrect previous schemas issues
spark.sql("""DROP TABLE IF EXISTS oct_events_delta""")

# Write dataFrame as new delta table
october_df.write.format("delta").mode("overwrite").saveAsTable("oct_events_delta")

# Check if silver layer is selected
if layer == "silver":
    silver_df = create_user_feature_table("oct_events_delta")

    # Save dataFrame as new delta table
    silver_df.write.format("delta").mode("overwrite").saveAsTable("user_feature_table_silver")

    print("Silver layer: user feature table has been created successfully")
else:
    print("Incorrect layer detected")


Silver layer: user feature table has been created successfully
