In [1]:
# Load Simulation Dataset

import pandas as pd
import numpy as np

pd.set_option("display.max_columns", None)

df = pd.read_csv("../01_data/processed/credit_card_marketing_simulation.csv")
df.shape


(30000, 35)

In [2]:
# Create Segmentation Data Frame

seg_df = df[
    ["engagement_score", "utilization_proxy", "default", "adopted"]
].copy()


In [3]:
# Establish Engagement Tiers

seg_df["engagement_tier"] = pd.qcut(
    seg_df["engagement_score"],
    q=4,
    labels=["Low", "Mid-Low", "Mid-High", "High"]
)


In [4]:
# Establish Utilization Tiers

seg_df["utilization_tier"] = pd.qcut(
    seg_df["utilization_proxy"],
    q=3,
    labels=["Low Utilization", "Medium Utilization", "High Utilization"]
)


In [5]:
# Define Customer Segments

def assign_segment(row):
    if row["default"] == 1:
        return "High Risk / Deprioritize"
    if row["engagement_tier"] == "High" and row["utilization_tier"] == "Low Utilization":
        return "High-Value Growth"
    if row["engagement_tier"] in ["Mid-High", "High"] and row["utilization_tier"] == "Medium Utilization":
        return "Core Engaged"
    if row["engagement_tier"] in ["Low", "Mid-Low"]:
        return "Low Engagement"
    return "Other"

seg_df["customer_segment"] = seg_df.apply(assign_segment, axis=1)

seg_df["customer_segment"].value_counts()

customer_segment
Low Engagement              10978
High Risk / Deprioritize     6636
High-Value Growth            5474
Core Engaged                 4280
Other                        2632
Name: count, dtype: int64

In [6]:
# Attach Segments to Dataset

df["customer_segment"] = seg_df["customer_segment"]

In [7]:
# Build Segment Summary Table

segment_summary = (
    df.groupby("customer_segment")
      .agg(
          customers=("customer_segment", "count"),
          adoption_rate=("adopted", "mean"),
          avg_engagement=("engagement_score", "mean"),
          avg_utilization=("utilization_proxy", "mean"),
          default_rate=("default", "mean")
      )
      .sort_values("adoption_rate", ascending=False)
)

segment_summary


Unnamed: 0_level_0,customers,adoption_rate,avg_engagement,avg_utilization,default_rate
customer_segment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
High-Value Growth,5474,0.213555,0.831264,0.01908,0.0
Other,2632,0.170213,0.468435,0.030227,0.0
Core Engaged,4280,0.160748,0.495529,0.180934,0.0
Low Engagement,10978,0.08517,0.181735,0.659037,0.0
High Risk / Deprioritize,6636,0.007836,0.334751,0.449,1.0


In [8]:
# Calculate Segment Share of Total Adopters

adopter_distribution = (
    df[df["adopted"] == 1]
      .groupby("customer_segment")
      .size()
      / df["adopted"].sum()
)

adopter_distribution.sort_values(ascending=False)

customer_segment
High-Value Growth           0.355103
Low Engagement              0.284022
Core Engaged                0.208991
Other                       0.136087
High Risk / Deprioritize    0.015796
dtype: float64

## Segment Summary

- High-Value Growth are low risk, high adoption rate, and highly engaged customers. Making them the primary target segment.
- Core Engaged customers demonstrate solid adoption and engagement rate, establish suitability for general marketing campaign.
- Low Engagement customers suggests lower ROI on campaigns due to limited response.
- High Risk / Deprioritize customers have elevated default risk and should be excluded from marketing campaigns.

In [10]:
# Save Segmented Dataset

output_path = "../01_data/processed/credit_card_segmented.csv"
df.to_csv(output_path, index=False)
