In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from tqdm import tqdm

In [2]:
np.random.seed(42)

N = 500
tenure = np.random.randint(1, 60, size=N)
orders = np.random.poisson(lam=6, size=N).clip(0, 30)
avg_spend = (np.random.normal(40, 15, size=N)).clip(5, 120)
cancel_rate = np.random.beta(a=2, b=8, size=N)
cs = np.random.poisson(lam=1.2, size=N).clip(0, 10)
discount = np.random.beta(a=2.5, b=4, size=N)

In [3]:
logit = (
    -0.03 * tenure
    -0.10 * orders
    -0.01 * avg_spend
    + 2.8  * cancel_rate
    + 0.30 * cs
    + 1.3  * discount
    - 0.1
)

In [4]:
prob = 1 / (1 + np.exp(-logit))
churn = (np.random.rand(N) < prob).astype(int)

In [5]:
df = pd.DataFrame({
    "customer_id": np.arange(1, N+1),
    "tenure_months": tenure,
    "orders_30d": orders,
    "avg_spend": np.round(avg_spend, 2),
    "cancel_rate": np.round(cancel_rate, 4),
    "cs_tickets_30d": cs,
    "discount_use_rate": np.round(discount, 4),
    "churn_30d": churn
})
df.to_csv("coupang_churn.csv", index=False, encoding="utf-8")
df.head()

Unnamed: 0,customer_id,tenure_months,orders_30d,avg_spend,cancel_rate,cs_tickets_30d,discount_use_rate,churn_30d
0,1,39,2,32.97,0.2906,2,0.2307,1
1,2,52,4,22.68,0.1561,0,0.2693,1
2,3,29,3,23.57,0.1974,2,0.4031,0
3,4,15,5,44.48,0.3113,2,0.4955,1
4,5,43,9,39.17,0.1083,3,0.4507,0


In [6]:
df = pd.read_csv("coupang_churn.csv")

feature_cols = [
    "tenure_months",
    "orders_30d",
    "avg_spend",
    "cancel_rate",
    "cs_tickets_30d",
    "discount_use_rate"
]
target_col = "churn_30d"

X = torch.tensor(df[feature_cols].values, dtype=torch.float32)
y = torch.tensor(df[[target_col]].values, dtype=torch.float32)

In [7]:
N = len(df)
idx = torch.randperm(N)
train_size = int(N * 0.8)
train_idx = idx[:train_size]
test_idx = idx[train_size:]

X_train, y_train = X[train_idx], y[train_idx]
X_test, y_test = X[test_idx], y[test_idx]

print("Train/Test:", X_train.shape, X_test.shape)
print("Churn rate(train):", float(y_train.mean()))

Train/Test: torch.Size([400, 6]) torch.Size([100, 6])
Churn rate(train): 0.38749998807907104


In [8]:
model = nn.Sequential(
    nn.Linear(len(feature_cols), 7776),
    nn.ReLU(),

    nn.Linear(7776, 216),
    nn.ReLU(),

    nn.Linear(216, 36),
    nn.ReLU(),

    nn.Linear(36, 6),
    nn.ReLU(),

    nn.Linear(6, 1),
    nn.Sigmoid()
)

loss_fn = nn.BCEWithLogitsLoss()
opt = torch.optim.Adam(model.parameters(), lr=0.001)

In [10]:
for epoch in range(20000):
  logits = model(X_train)
  loss = loss_fn(logits, y_train)

  opt.zero_grad()
  loss.backward()
  opt.step()

  if (epoch + 1) % 20 ==0:
    with torch.no_grad():
      v_logits = model(X_test)
      v_pred = (v_logits >= 0.5).float()
      v_acc = (v_pred == y_test).float().mean().item()
      v_loss = loss_fn(v_logits, y_test).item()
    print(f"epoch {epoch+1:3d} | train_loss {loss.item(): .4f} | val_loss {v_loss: .4f} | val_acc {v_acc}")

epoch  20 | train_loss  0.6931 | val_loss  0.6932 | val_acc 0.6700000166893005
epoch  40 | train_loss  0.6931 | val_loss  0.6932 | val_acc 0.6700000166893005
epoch  60 | train_loss  0.6930 | val_loss  0.6932 | val_acc 0.6700000166893005
epoch  80 | train_loss  0.6900 | val_loss  0.6961 | val_acc 0.6700000166893005
epoch 100 | train_loss  0.6847 | val_loss  0.7176 | val_acc 0.6399999856948853
epoch 120 | train_loss  0.6818 | val_loss  0.7220 | val_acc 0.6499999761581421
epoch 140 | train_loss  0.6808 | val_loss  0.7218 | val_acc 0.6499999761581421
epoch 160 | train_loss  0.6801 | val_loss  0.7224 | val_acc 0.6499999761581421
epoch 180 | train_loss  0.6797 | val_loss  0.7222 | val_acc 0.6499999761581421
epoch 200 | train_loss  0.6793 | val_loss  0.7221 | val_acc 0.6499999761581421
epoch 220 | train_loss  0.6784 | val_loss  0.7217 | val_acc 0.6499999761581421
epoch 240 | train_loss  0.6776 | val_loss  0.7215 | val_acc 0.6499999761581421
epoch 260 | train_loss  0.6765 | val_loss  0.7213 | 

In [12]:
one = torch.tensor([[12.0, 3.0, 35.0, 0.20, 1.0, 0.40]], dtype=torch.float32)

with torch.no_grad():
  logit = model(one).item()
  churn = 1 if logit > 0 else 0
print("logit", logit)
print("predict:", "Churn(1)" if churn == 1 else "Stay(0)")

logit 0.0
predict: Stay(0)
