In [0]:
import pandas as pd

base_url = "https://raw.githubusercontent.com/sriram1105-m/Customer-Intelligence-Platform/main/data/final/"

# Load customer 360 table
file_name = "customer_360.csv"
pdf = pd.read_csv(base_url + file_name)
customer_360_df = spark.createDataFrame(pdf)

In [0]:
from pyspark.sql import functions as F

# Step 1: Defining Thresholds
RFM_THRESHOLDS = {
    "recency_days" : [30, 60, 120],
    "frequency" : [2, 5, 10],
    "monetary_value" : [500, 1000, 5000]
}

# Step 2: Apply Segmentation Logic inline
customer_segments_df = (customer_360_df.withColumn(
    "segment",
    F.when((F.col("recency_days") <= RFM_THRESHOLDS["recency_days"][0]) &
           (F.col("frequency") >= RFM_THRESHOLDS["frequency"][2]) &
           (F.col("monetary_value") >= RFM_THRESHOLDS["monetary_value"][2]), "High Value")
     .when((F.col("recency_days") <= RFM_THRESHOLDS["recency_days"][1]) &
           (F.col("frequency") >= RFM_THRESHOLDS["frequency"][1]) &
           (F.col("monetary_value") >= RFM_THRESHOLDS["monetary_value"][1]), "Loyal")
     .when((F.col("recency_days") > RFM_THRESHOLDS["recency_days"][2]), "Churn Risk")
     .otherwise("Regular")
))



In [0]:
# Quick summary check
display(customer_segments_df.groupBy("segment").count())

segment,count
Regular,5348
Churn Risk,7114
High Value,116
Loyal,3416


In [0]:
# Saving the Customer Segmentation Table

customer_segments_df.toPandas().to_csv("customer_segments.csv", index=False)