In [9]:
## Data Generation 

import pandas as pd
import numpy as np
from datetime import timedelta, datetime
import random

np.random.seed(42)

# generation of customer data (random)
n_customers = 2000
signup_dates = pd.date_range(start="2022-01-01", end="2023-12-01", periods=n_customers)
customer_data = pd.DataFrame({
    "customer_id": range(1, n_customers + 1),
    "age": np.random.randint(18, 70, n_customers),
    "gender": np.random.choice(["Male", "Female"], n_customers),
    "tenure_months": np.random.randint(1, 24, n_customers),
    "monthly_usage_minutes": np.random.normal(loc=400, scale=100, size=n_customers).astype(int),
    "signup_date": signup_dates,
    "is_churned": np.random.choice([0, 1], n_customers, p=[0.85, 0.15])
})

# Add revenue and campaign info
customer_data["monthly_revenue"] = np.where(
    customer_data["is_churned"] == 1,
    np.random.normal(25, 5, n_customers),
    np.random.normal(30, 8, n_customers)
).round(2)

# Simulate campaign: 50% control, 50% treatment
customer_data["campaign_group"] = np.random.choice(["control", "treatment"], n_customers)
customer_data["converted"] = np.where(
    (customer_data["campaign_group"] == "treatment") & 
    (customer_data["is_churned"] == 0),
    np.random.choice([0, 1], n_customers, p=[0.6, 0.4]),
    np.random.choice([0, 1], n_customers, p=[0.8, 0.2])
)

# Save to CSV
customer_data.to_csv("synthetic_customer_data.csv", index=False)
print("Synthetic customer data saved to data/synthetic_customer_data.csv")

Synthetic customer data saved to data/synthetic_customer_data.csv


In [7]:
!pwd

/Users/sarrahabdalla/Marketing Analytics


In [8]:
!ls

ABTest.ipynb                 DataGen.ipynb
ABTest.txt                   synthetic_customer_data.csv
Churn prediction model.ipynb
