In [None]:
"""
Hierarchical Bayesian Model for Price Elasticity

📌 **Objective**:
- Create a synthetic dataset for a retail pricing problem where we analyze the causal effects of a price change on customer demand
- Understand relationship between product price, advertising spend, competitor pricing, and customer demand

🔍 **Key Takeaways**:
- **BLAH**: 
- **Next Steps**: 
    - 

📌 **Methodology**:
1. **Create a causal graph** using NetworkX
2. **Perform causal discovery** using DoWhy


✍ **Author**: Justin Wall
📅 **Date**: 02/13/2025
"""

In [1]:
# ================================= #
# Generate Fake Retail Pricing Data #
# ================================= #
#%%
import numpy as np
import pandas as pd

# Set random seed for reproducibility
np.random.seed(42)

# Define possible values for hierarchies
customer_segments = ["Young", "Middle-aged", "Senior"]
regions = ["North", "South", "East", "West"]
seasons = ["Winter", "Spring", "Summer", "Fall"]

# Generate synthetic dataset
n_samples = 1000  # Number of observations

data = {
    "price": np.random.uniform(10, 100, n_samples),  # Random prices between $10 and $100
    "customer_segment": np.random.choice(customer_segments, n_samples),
    "region": np.random.choice(regions, n_samples),
    "season": np.random.choice(seasons, n_samples)
}

# Define base demand and elasticity per segment, region, and season
segment_elasticity = {"Young": -2.0, "Middle-aged": -1.5, "Senior": -1.2}
region_elasticity = {"North": -1.8, "South": -1.3, "East": -1.5, "West": -1.6}
season_elasticity = {"Winter": -1.4, "Spring": -1.7, "Summer": -2.0, "Fall": -1.5}

# Compute sales based on price elasticity and some random noise
sales = []
for i in range(n_samples):
    base_demand = 500  # Base demand level
    segment = data["customer_segment"][i]
    region = data["region"][i]
    season = data["season"][i]
    price = data["price"][i]

    # Compute sales using price elasticity
    elasticity = (segment_elasticity[segment] +
                  region_elasticity[region] +
                  season_elasticity[season] +
                  np.random.normal(0, 0.2))  # Adding some noise

    predicted_sales = base_demand * (price ** elasticity)  # Simple demand function
    sales.append(max(0, predicted_sales))  # Ensure sales are non-negative

# Add sales to dataset
data["sales"] = sales

# Convert to DataFrame
df = pd.DataFrame(data)

# Display first few rows
df.head()
#%%

Unnamed: 0,price,customer_segment,region,season,sales
0,43.708611,Senior,South,Summer,6.406332e-06
1,95.564288,Middle-aged,West,Spring,3.292181e-07
2,75.879455,Senior,North,Summer,2.753521e-07
3,63.879264,Senior,West,Summer,5.09961e-07
4,24.041678,Young,South,Summer,1.37931e-05


In [2]:
# ================================= #
# Create Features for Model         #
# ================================= #
#%%
import pymc as pm
import arviz as az

# Encode categorical variables
df["customer_segment_code"] = df["customer_segment"].astype("category").cat.codes
df["region_code"] = df["region"].astype("category").cat.codes
df["season_code"] = df["season"].astype("category").cat.codes

# Standardize price for better numerical stability
df["price_std"] = (df["price"] - df["price"].mean()) / df["price"].std()

# Log-transform sales to handle large variations and skewness
df["log_sales"] = np.log1p(df["sales"])
#%%

In [3]:
# ================================= #
# Run Hierarchical Bayesian Model   #
# ================================= #
#%%
# Build hierarchical Bayesian model
with pm.Model() as hierarchical_model:
    # Hyperpriors for group-level effects
    mu_segment = pm.Normal("mu_segment", mu=0, sigma=1)
    sigma_segment = pm.Exponential("sigma_segment", 1.0)

    mu_region = pm.Normal("mu_region", mu=0, sigma=1)
    sigma_region = pm.Exponential("sigma_region", 1.0)

    mu_season = pm.Normal("mu_season", mu=0, sigma=1)
    sigma_season = pm.Exponential("sigma_season", 1.0)

    # Group-level effects
    segment_effects = pm.Normal("segment_effects", mu=mu_segment, sigma=sigma_segment, shape=len(customer_segments))
    region_effects = pm.Normal("region_effects", mu=mu_region, sigma=sigma_region, shape=len(regions))
    season_effects = pm.Normal("season_effects", mu=mu_season, sigma=sigma_season, shape=len(seasons))

    # Price elasticity coefficient
    beta_price = pm.Normal("beta_price", mu=-1.5, sigma=0.5)  # Prior centered around typical elasticity

    # Likelihood model
    mu = (
        segment_effects[df["customer_segment_code"].values]
        + region_effects[df["region_code"].values]
        + season_effects[df["season_code"].values]
        + beta_price * df["price_std"].values
    )

    sigma = pm.Exponential("sigma", 1.0)
    sales_obs = pm.Normal("sales_obs", mu=mu, sigma=sigma, observed=df["log_sales"].values)

    # Sample from the posterior
    trace = pm.sample(2000, tune=1000, target_accept=0.9, return_inferencedata=True)
#%%

Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [mu_segment, sigma_segment, mu_region, sigma_region, mu_season, sigma_season, segment_effects, region_effects, season_effects, beta_price, sigma]


Output()

ValueError: Not enough samples to build a trace.

In [None]:
# ================================= #
# Summarize Model Results           #
# ================================= #
#%%
# Summarize results
summary = az.summary(trace, var_names=["beta_price", "segment_effects", "region_effects", "season_effects"])
summary
#%%