# Brand level conversion analysis

In [0]:
events = spark.read.table("workspace.ecommerce.df_october")

Discriptive Stats for each brand

In [0]:
from pyspark.sql import functions as F

Discriptive_stats =(
    events.filter(F.col("brand").isNotNull())
    .groupBy("brand")
    .agg(
        F.round(F.stddev("price"),0).alias("stddev"),
        F.round(F.mean("price"),0).alias("mean"),
        F.round(F.mode("price"),0).alias("mode"),
        F.round(F.max("price"),0).alias("max"),
        F.round(F.min("price"),0).alias("min")
    )
)

In [0]:
display(Discriptive_stats)

In [0]:
events = events.withColumn("date",F.col("event_time").cast("date"))

In [0]:
display(events.printSchema())

Overall Weekday vs Weekend conversion ratio

In [0]:
from pyspark.sql import functions as F
from pyspark.sql.window import Window


events = events.withColumn(
    "is_weekend",
    F.when(F.dayofweek("date").isin([1, 7]), "Weekend").otherwise("Weekday")
)
display(
    events.groupBy("is_weekend", "event_type")
    .count()
    .withColumn(
        "ratio",
        F.col("count") / F.sum("count").over(Window.partitionBy("event_type"))
    )
)

Weekend & conversion ratio for each brand

In [0]:
agg = events.groupBy("brand", "is_weekend").agg(
    F.sum(F.when(F.col("event_type") == "view", 1).otherwise(0)).alias("total_views"),
    F.sum(F.when(F.col("event_type") == "cart", 1).otherwise(0)).alias("total_carts"),
    F.sum(F.when(F.col("event_type") == "purchase", 1).otherwise(0)).alias("total_purchases")
)
agg = agg.withColumn(
    "view_to_purchase_rate",
    F.round(F.try_divide(F.col("total_purchases"), F.col("total_views")), 2)
).withColumn(
    "cart_to_purchase_rate",
    F.round(F.try_divide(F.col("total_purchases"), F.col("total_carts")), 2)
)

brand_totals = agg.groupBy("brand").agg(
    F.sum("total_purchases").alias("Overall_purchases")
)

conversion = agg.join(brand_totals, "brand", "left").withColumn(
    "Weekly_purchase_ratio",
    F.round(F.try_divide(F.col("Overall_purchases"), F.col("total_purchases")), 2)
)



In [0]:
conversion.orderBy("brand").display()


Calculating Overall sales for each brand

In [0]:
total_price = events.filter((F.col("brand").isNotNull()) & (F.col("event_type") == "purchase")) \
    .groupBy("brand") \
    .agg(F.round(F.sum("price"), 0).alias("total_price"))

Joining Sale price + conversion & is_weekend ratio

In [0]:
Derived_Features = (total_price
                   .join(conversion, "brand","outer")
                   #.dropDuplicates(["brand"])
)

display(Derived_Features.orderBy("brand"))

Adding Discriptive stats to the Derived features

In [0]:
Derived_Features = Derived_Features.join(Discriptive_stats, "brand","inner")

In [0]:
display(Derived_Features.orderBy("brand").where(F.col("brand").isNotNull()))

Correlation

In [0]:

print(
    "Correlation between total_price and cart_to_purchase_rate:",
    Derived_Features.stat.corr("total_price", "cart_to_purchase_rate")
)
print(
    "Correlation between total_price and view_to_purchase_rate:",
    Derived_Features.stat.corr("total_price", "view_to_purchase_rate")
)
print(
    "Correlation between cart_to_purchase_rate and view_to_purchase_rate:",
    Derived_Features.stat.corr("cart_to_purchase_rate", "view_to_purchase_rate")
)

Loading the Gold data to Delat table

In [0]:
Derived_Features.filter(F.col("brand").isNotNull()).write.format("delta").mode("overwrite").option("overwriteSchema", "true").saveAsTable("ecom.gold.Derived_Features")

spark.sql("""
    ALTER TABLE ecom.gold.Derived_Features 
    ALTER COLUMN brand SET NOT NULL
""")

spark.sql("""
    ALTER TABLE ecom.gold.Derived_Features
    ADD CONSTRAINT pk_derived_features PRIMARY KEY (brand)
""")

Filtering & Visualizing

In [0]:
TopVCWDay = spark.read.table("ecom.gold.Derived_Features").filter("is_weekend == 'Weekday'").orderBy(F.col("view_to_purchase_rate").desc()).limit(10)
TopVCWEnd =  spark.read.table("ecom.gold.Derived_Features").filter("is_weekend == 'Weekend'").orderBy(F.col("view_to_purchase_rate").desc()).limit(10)
TopCCWDay =  spark.read.table("ecom.gold.Derived_Features").filter("is_weekend == 'Weekday'").orderBy(F.col("cart_to_purchase_rate").desc()).limit(10)
TopCCWEnd =  spark.read.table("ecom.gold.Derived_Features").filter("is_weekend == 'Weekend'").orderBy(F.col("cart_to_purchase_rate").desc()).limit(10)
TopSWDay =  spark.read.table("ecom.gold.Derived_Features").filter("is_weekend == 'weekday'").orderBy(F.col("total_price").desc()).limit(10)
TopSWEnd =  spark.read.table("ecom.gold.Derived_Features").filter("is_weekend == 'Weekend'").orderBy(F.col("total_price").desc()).limit(10)

In [0]:
TopSWEnd.display()