In [1]:
# Load & Inspect Processed Dataset

import pandas as pd
import numpy as np

pd.set_option("display.max_columns", None)

df = pd.read_csv("../01_data/processed/credit_card_features.csv")
df.head()


Unnamed: 0,id,credit_limit,gender,education_level,marital_status,age,pay_0,pay_2,pay_3,pay_4,pay_5,pay_6,bill_amt1,bill_amt2,bill_amt3,bill_amt4,bill_amt5,bill_amt6,pay_amt1,pay_amt2,pay_amt3,pay_amt4,pay_amt5,pay_amt6,default,avg_bill_amt,max_bill_amt,avg_payment_amt,payment_ratio,utilization_proxy,engagement_score,risk_flag
0,1,20000,2,2,1,24,2,2,-1,-1,-2,-2,3913,3102,689,0,0,0,0,689,0,0,0,0,1,1284.0,3913,114.833333,0.089434,0.0642,0.410386,High Risk
1,2,120000,2,2,2,26,-1,2,0,0,0,2,2682,1725,2682,3272,3455,3261,0,1000,1000,1000,0,2000,1,2846.166667,3455,833.333333,0.292791,0.023718,0.508278,High Risk
2,3,90000,2,2,2,34,0,0,0,0,0,0,29239,14027,13559,14331,14948,15549,1518,1500,1000,1000,1000,5000,0,16942.166667,29239,1836.333333,0.108388,0.188246,0.371919,Low Risk
3,4,50000,2,2,1,37,0,0,0,0,0,0,46990,48233,49291,28314,28959,29547,2000,2019,1200,1100,1069,1000,0,38555.666667,49291,1398.0,0.036259,0.771113,0.114848,Low Risk
4,5,50000,1,2,1,57,-1,0,-1,0,0,0,8617,5670,35835,20940,19146,19131,2000,36681,10000,9000,689,679,0,18223.166667,35835,9841.5,0.540054,0.364463,0.474391,Low Risk


In [2]:
# Validation Check

df.shape

(30000, 32)

## Product & Marketing Assumptions

- Product: Limited-time 5% cash-back category offer.
- Goal: Increase spending and engagement among qualified (low-risk) customers.
- Channels: Email and app notifications.
- Constraint: Avoid targeting high-risk customers (defaulted).
- Most likely adoption by:
    - Low-risk customers.
    - Moderate utilization levels.
    - Highly engaged customers.

In [3]:
# Simulate Offer Exposure

# Base exposure probability
base_exposure_prob = 0.45

# Reduce exposure for high-risk customers
exposure_prob = np.where(
    df["default"] == 1,
    0.10,                 # high-risk: limited exposure
    base_exposure_prob     # low-risk: normal exposure
)

df["offer_exposed"] = np.random.binomial(1, exposure_prob)

df["offer_exposed"].value_counts(normalize=True)

offer_exposed
0    0.6251
1    0.3749
Name: proportion, dtype: float64

In [4]:
# Assign Marketing Channels

channels = ["email", "app", "both"]

df["channel"] = np.where(
    df["offer_exposed"] == 1,
    np.random.choice(channels, size=len(df), p=[0.4, 0.4, 0.2]),
    "none"
)

df["channel"].value_counts()


channel
none     18753
app       4558
email     4419
both      2270
Name: count, dtype: int64

In [5]:
# Simulate Adoption

# Normalize engagement score into 0-1 range
eng_min = df["engagement_score"].min()
eng_max = df["engagement_score"].max()
engagement_norm = (df["engagement_score"] - eng_min) / (eng_max - eng_min)

# Build adoption probability (before exposure constraint)
adoption_prob = (
    0.03 +                         # base
    0.55 * engagement_norm +       # engagement effect
    0.15 * (1 - df["utilization_proxy"]) -  # lower utilization -> higher likelihood
    0.25 * df["default"]           # high-risk reduces likelihood
)

# Only exposed customers can adopt
adoption_prob = np.where(df["offer_exposed"] == 1, adoption_prob, 0)

# Keep probabilities in a realistic range
adoption_prob = np.clip(adoption_prob, 0, 0.60)

df["adopted"] = np.random.binomial(1, adoption_prob)

df["adopted"].mean()


np.float64(0.10973333333333334)

In [6]:
# Validation Check 
# No Adoption Without Exposure

pd.crosstab(df["offer_exposed"], df["adopted"], normalize="index")

adopted,0,1
offer_exposed,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1.0,0.0
1,0.7073,0.2927


In [7]:
# Adoption by Risk

df.groupby("default")["adopted"].mean()

default
0    0.138675
1    0.007836
Name: adopted, dtype: float64

In [8]:
# Adoption by Channel

df.groupby("channel")["adopted"].mean().sort_values(ascending=False)

channel
email    0.294863
both     0.293392
app      0.290259
none     0.000000
Name: adopted, dtype: float64

In [9]:
# Validation Check

df["offer_exposed"].mean()
df["adopted"].mean()
pd.crosstab(df["offer_exposed"], df["adopted"], normalize="index")


adopted,0,1
offer_exposed,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1.0,0.0
1,0.7073,0.2927


## Marketing Simulation Summary

Since this project utilizes publicly available credit card data, actual campaign logs are proprietary information and not available for public use, marketing exposure and adoption are simulated for a product launch situation.

- For this model, exposure is risk-aware, meaning customers identified as defaulted are less likely to be targeted.
- Probability of adoption is behavior-driven through engagement, utilization, and risk status.
- These fields provide segmentation, propensity, and decision analysis for the offer launch.

In [11]:
# Save Simulated Dataset

output_path = "../01_data/processed/credit_card_marketing_simulation.csv"
df.to_csv(output_path, index=False)

output_path


'../01_data/processed/credit_card_marketing_simulation.csv'