In [0]:
import pymc as pm
import pytensor.tensor as pt
import pytensor.scan as scan
import numpy as np
import pandas as pd


# Simulate Data

In [0]:

# Set random seed for reproducibility
np.random.seed(42)

# Time settings
n_days = 90
dates = pd.date_range(start="2024-01-01", periods=n_days, freq="D")

# Channel structure
channel_structure = {
    "Display": ["Publisher A", "Publisher B"],
    "Linear": ["Network X", "Network Y", "Network Z"]
}

# Flatten subchannels into a list with tuples (channel, subchannel)
channel_subchannel_pairs = [(ch, sub) for ch, subs in channel_structure.items() for sub in subs]

# Create base dataset
data = []
for day in dates:
    for channel, subchannel in channel_subchannel_pairs:
        spend = np.random.gamma(shape=2.0, scale=30.0)  # smaller scale
        data.append({
            "date": day,
            "channel": channel,
            "subchannel": subchannel,
            "spend": spend,
        })

df_mini = pd.DataFrame(data)

# Assign ground-truth beta per subchannel (with higher variability)
unique_subchannels = df_mini["subchannel"].unique()
true_betas = {sub: np.random.normal(loc=0.4, scale=0.3) for sub in unique_subchannels}  # Higher scale for more variability

# Assign channel-level priors for alpha
unique_channels = df_mini["channel"].unique()
mu_alpha_channels = {
    "Display": 0.3,  # Channel 1 (Display) mean alpha
    "Linear": 0.8   # Channel 2 (Linear) mean alpha
}
sigma_alpha_channels = {channel: np.random.uniform(0.05, 0.1) for channel in unique_channels}  # Channel-level std dev for alphas

# Assign subchannel alphas using channel-level priors
true_alphas = {}
for subchannel in unique_subchannels:
    channel = df_mini[df_mini["subchannel"] == subchannel]["channel"].iloc[0]
    mu_alpha = mu_alpha_channels[channel]
    sigma_alpha = sigma_alpha_channels[channel]
    true_alphas[subchannel] = np.random.normal(loc=mu_alpha, scale=.1)

# Print the ground-truth parameters for reference
print("true subchannel_betas:", true_betas)
print("true channel_mu_alphas:", mu_alpha_channels)
print("true channel_sigma_alphas:", sigma_alpha_channels)
print("true subchannel_alphas:", true_alphas)

# Adstock transformation function
def geometric_adstock(spend, alpha):
    """
    Applies geometric adstock to a time series of spend.
    
    Parameters:
        spend (array-like): A time series of media spend.
        alpha (float): Adstock decay parameter (0 < alpha < 1). Higher alpha means slower decay.
        
    Returns:
        numpy.ndarray: Adstock-transformed spend.
    """
    if not (0 < alpha < 1):
        raise ValueError("Alpha must be between 0 and 1 (exclusive).")
    
    adstocked = np.zeros_like(spend, dtype=np.float64)
    carryover = 0
    for t in range(len(spend)):
        adstocked[t] = spend[t] + alpha * carryover
        carryover = adstocked[t]
    return adstocked
  
true_intercept = 50

# Simulate adstock and Y
y_by_day = {}
for day in dates:
    daily_df = df_mini[df_mini["date"] <= day]
    signal = 0
    for sub in unique_subchannels:
        sub_spend = daily_df[daily_df["subchannel"] == sub].sort_values("date")["spend"].values
        alpha = true_alphas[sub]  # Use subchannel alpha (partially pooled by channel)
        adstocked = geometric_adstock(sub_spend, alpha)
        beta = true_betas[sub]
        if len(adstocked) > 0:
            signal += beta * adstocked[-1]
    noise = np.random.normal(0, 0.1)
    y = true_intercept + signal + noise
    y_by_day[day] = y

# Merge y into daily subchannel data
y_df = pd.DataFrame({"date": list(y_by_day.keys()), "y": list(y_by_day.values())})
df_mini = df_mini.merge(y_df, on="date", how="left")

# Display the updated dataframe
display(df_mini)

true subchannel_betas: {'Publisher A': 0.5288854657397758, 'Publisher B': 0.46230630614893337, 'Network X': 0.48147365115861207, 'Network Y': 0.016975427253907216, 'Network Z': 0.07568303787752134}
true channel_mu_alphas: {'Display': 0.3, 'Linear': 0.8}
true channel_sigma_alphas: {'Display': 0.09621980976882652, 'Linear': 0.0985529122582684}
true subchannel_alphas: {'Publisher A': 0.40531528533329036, 'Publisher B': 0.29604448461433447, 'Network X': 0.8681500697372625, 'Network Y': 0.8028318376130461, 'Network Z': 0.8029756139495746}


date,channel,subchannel,spend,y
2024-01-01T00:00:00Z,Display,Publisher A,71.8103816960771,140.02739387148964
2024-01-01T00:00:00Z,Display,Publisher B,44.83394190646763,140.02739387148964
2024-01-01T00:00:00Z,Linear,Network X,41.46850753112861,140.02739387148964
2024-01-01T00:00:00Z,Linear,Network Y,41.46906882995402,140.02739387148964
2024-01-01T00:00:00Z,Linear,Network Z,139.49143236691953,140.02739387148964
2024-01-02T00:00:00Z,Display,Publisher A,86.00118692892667,196.2085904684536
2024-01-02T00:00:00Z,Display,Publisher B,33.93234005222697,196.2085904684536
2024-01-02T00:00:00Z,Linear,Network X,74.0944342029364,196.2085904684536
2024-01-02T00:00:00Z,Linear,Network Y,59.96880792193941,196.2085904684536
2024-01-02T00:00:00Z,Linear,Network Z,6.477448304717562,196.2085904684536


In [0]:
unique_subchannels

array(['Publisher A', 'Publisher B', 'Network X', 'Network Y',
       'Network Z'], dtype=object)

In [0]:

# Split spend by channel for adstock transformation
channel_names = df_mini["channel"].unique()
channel_to_idx = {name: i for i, name in enumerate(channel_names)}
df_mini["channel_idx"] = df_mini["channel"].map(channel_to_idx).astype(int)

subchannel_names = df_mini["subchannel"].unique()
subchannel_to_idx = {name: i for i, name in enumerate(subchannel_names)}
df_mini["subchannel_idx"] = df_mini["subchannel"].map(subchannel_to_idx).astype(int)


In [0]:
# Group spend data by subchannel
x_by_subchannel = []
obs_idx_by_subchannel = []

# Loop through unique subchannels to extract spend data and corresponding indices
for subchannel in df_mini["subchannel"].unique():
    subchannel_spend = df_mini[df_mini["subchannel"] == subchannel]["spend"].values  # Spend for this subchannel
    subchannel_indices = df_mini[df_mini["subchannel"] == subchannel].index.values  # Row indices in df_mini

    x_by_subchannel.append(subchannel_spend)
    obs_idx_by_subchannel.append(subchannel_indices)

In [0]:

# Prepare final inputs
y = df_mini["y"].values
subchannel_idx = df_mini["subchannel_idx"].values
channel_idx = df_mini["channel_idx"].values
n_channels = len(channel_names)
print("n_channels:", n_channels)
n_subchannels = len(subchannel_names)
print("n_subchannels:", n_subchannels)

n_channels: 2
n_subchannels: 5


# Functionality for Geometric Adstock Transform

In [0]:
# Define a custom PyTensor-based adstock transformation
def adstock_transform(x, alpha):
    """
    Perform Geometric Adstock Transformation.
    Args:
        x: The spend vector for a channel (1D array).
        alpha: The adstock decay factor (scalar).
    Returns:
        A PyTensor symbolic vector with the adstocked values.
    """
    def step(x_t, adstocked_t_minus_1, alpha):
        return x_t + alpha * adstocked_t_minus_1

    adstocked, _ = scan(
        fn=step,
        sequences=[x],
        outputs_info=pt.zeros_like(x[0]),  # Initialize adstocked values with zero
        non_sequences=[alpha],
    )
    return adstocked


# Run Model

In [0]:
# Create mapping from subchannel to its parent channel index
subchannel_to_channel = (
    df_mini.drop_duplicates("subchannel")[["subchannel", "channel_idx"]]
    .set_index("subchannel")["channel_idx"]
    .to_dict()
)

# Create an array where each element is the channel_idx for the corresponding subchannel
channel_idx_for_each_subchannel = np.array([
    subchannel_to_channel[sub] for sub in subchannel_names
])

print("subchannel_to_channel:", subchannel_to_channel)
print("channel_idx_for_each_subchannel:", channel_idx_for_each_subchannel)

subchannel_to_channel: {'Publisher A': 0, 'Publisher B': 0, 'Network X': 1, 'Network Y': 1, 'Network Z': 1}
channel_idx_for_each_subchannel: [0 0 1 1 1]


In [0]:
coords = {
    "channel": ["Display", "Linear"],
    "subchannel": ["Publisher A", "Publisher B", "Network X", "Network Y", "Network Z"],
    "obs_id": np.arange(len(df_mini))
}

with pm.Model(coords=coords) as model:
    pm.MutableData("subchannel_idx", subchannel_idx, dims="obs_id")
    pm.MutableData("channel_idx_for_each_subchannel", channel_idx_for_each_subchannel, dims="subchannel")

    mu_alpha = pm.Normal("mu_alpha", mu=[0.3, 0.8], sigma=0.05, dims="channel")
    sigma_alpha = pm.HalfNormal("sigma_alpha", sigma=0.1, dims="channel")

    alpha = pm.Normal(
        "adstock_alpha",
        mu=mu_alpha[channel_idx_for_each_subchannel],
        sigma=sigma_alpha[channel_idx_for_each_subchannel],
        dims="subchannel"
    )

    # Subchannel-specific betas (NO PARTIAL POOLING)
    beta = pm.TruncatedNormal("beta", mu=0.4, sigma=0.3, lower=0, upper=1, dims="subchannel")


    # Global intercept term
    intercept = pm.Normal("intercept", mu=50, sigma=5)

    # Noise term (likelihood scale)
    sigma = pm.HalfNormal("sigma", sigma=.1)

    # ---- APPLY ADSTOCK TRANSFORMATION PER SUBCHANNEL ----

    # Preallocate a vector for the full adstocked spend,
    # aligned to each row of the original dataset (df_mini)
    adstocked_full = pt.zeros_like(df_mini["spend"].values.astype(float))

    # Loop through each subchannel and apply adstock transformation
    for sub_idx, (x_sub, idx_sub) in enumerate(zip(x_by_subchannel, obs_idx_by_subchannel)):
        # Convert raw spend array for current subchannel to a PyTensor variable
        x_sub_tensor = pt.as_tensor_variable(x_sub.astype(float))

        # Apply adstock to this subchannel's spend using its specific alpha
        adstocked_s = adstock_transform(x_sub_tensor, alpha[sub_idx])

        # Insert the transformed values back into the correct positions
        # in the global adstocked_full vector
        adstocked_full = pt.set_subtensor(adstocked_full[idx_sub], adstocked_s)

    # ---- LINEAR MODEL ----

    # Compute the predicted response using:
    # - intercept
    # - subchannel-specific betas
    # - adstock
    mu = intercept + beta[subchannel_idx] * adstocked_full

    # Likelihood function: observed y values modeled as Normal(mu, sigma)
    likelihood = pm.Normal("obs", mu=mu, sigma=sigma, observed=y)

    # ---- SAMPLING ----

    # Run MCMC to sample from the posterior distribution
    trace = pm.sample(200, tune=200, return_inferencedata=True)

Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
INFO:pymc.sampling.mcmc:Multiprocess sampling (4 chains in 4 jobs)
NUTS: [mu_alpha, sigma_alpha, adstock_alpha, beta, intercept, sigma]
INFO:pymc.sampling.mcmc:NUTS: [mu_alpha, sigma_alpha, adstock_alpha, beta, intercept, sigma]


Output()

In [0]:
import arviz as az # Output Results


# 1. Posterior Summary
print("Posterior Summary:")
summary = az.summary(trace, round_to=2).reset_index()
display(summary)

Posterior Summary:


index,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
adstock_alpha[Publisher A],-0.05,0.44,-0.52,0.68,0.21,0.11,4.36,4.35,18.49
adstock_alpha[Publisher B],0.39,0.42,-0.15,1.04,0.2,0.1,4.36,4.35,18.49
adstock_alpha[Network X],1.42,0.31,0.93,1.77,0.15,0.07,4.55,12.05,6.01
adstock_alpha[Network Y],1.45,0.31,0.94,1.77,0.15,0.07,4.55,12.05,4.75
adstock_alpha[Network Z],1.39,0.28,0.92,1.63,0.14,0.07,4.58,12.05,5.41
beta[Publisher A],0.39,0.1,0.28,0.49,0.05,0.01,4.36,4.35,18.16
beta[Publisher B],0.48,0.12,0.28,0.61,0.06,0.03,4.36,4.35,18.49
beta[Network X],0.46,0.15,0.32,0.71,0.07,0.04,4.37,4.35,13.15
beta[Network Y],0.44,0.11,0.31,0.6,0.05,0.02,4.55,12.05,5.89
beta[Network Z],0.42,0.05,0.37,0.47,0.02,0.0,4.47,11.52,6.01


In [0]:
print(trace['posterior'].keys())

com.databricks.backend.common.rpc.CommandSkippedException
	at com.databricks.spark.chauffeur.SequenceExecutionState.$anonfun$cancel$3(SequenceExecutionState.scala:134)
	at com.databricks.spark.chauffeur.SequenceExecutionState.$anonfun$cancel$3$adapted(SequenceExecutionState.scala:129)
	at scala.collection.immutable.Range.foreach(Range.scala:158)
	at com.databricks.spark.chauffeur.SequenceExecutionState.cancel(SequenceExecutionState.scala:129)
	at com.databricks.spark.chauffeur.ExecContextState.cancelRunningSequence(ExecContextState.scala:715)
	at com.databricks.spark.chauffeur.ExecContextState.$anonfun$cancel$1(ExecContextState.scala:435)
	at scala.Option.getOrElse(Option.scala:189)
	at com.databricks.spark.chauffeur.ExecContextState.cancel(ExecContextState.scala:435)
	at com.databricks.spark.chauffeur.ExecutionContextManagerV1.cancelExecution(ExecutionContextManagerV1.scala:466)
	at com.databricks.spark.chauffeur.ChauffeurState.$anonfun$process$1(ChauffeurState.scala:757)
	at com.data

In [0]:


# Pass the dictionary to `var_names` to relabel the plots
print("Plotting posterior distributions...")
az.plot_posterior(trace)
plt.show()

com.databricks.backend.common.rpc.CommandSkippedException
	at com.databricks.spark.chauffeur.SequenceExecutionState.$anonfun$cancel$3(SequenceExecutionState.scala:134)
	at com.databricks.spark.chauffeur.SequenceExecutionState.$anonfun$cancel$3$adapted(SequenceExecutionState.scala:129)
	at scala.collection.immutable.Range.foreach(Range.scala:158)
	at com.databricks.spark.chauffeur.SequenceExecutionState.cancel(SequenceExecutionState.scala:129)
	at com.databricks.spark.chauffeur.ExecContextState.cancelRunningSequence(ExecContextState.scala:715)
	at com.databricks.spark.chauffeur.ExecContextState.$anonfun$cancel$1(ExecContextState.scala:435)
	at scala.Option.getOrElse(Option.scala:189)
	at com.databricks.spark.chauffeur.ExecContextState.cancel(ExecContextState.scala:435)
	at com.databricks.spark.chauffeur.ExecutionContextManagerV1.cancelExecution(ExecutionContextManagerV1.scala:466)
	at com.databricks.spark.chauffeur.ChauffeurState.$anonfun$process$1(ChauffeurState.scala:757)
	at com.data

In [0]:
# 3. Trace Plots
print("Plotting trace plots...")
az.plot_trace(trace)
plt.show()

com.databricks.backend.common.rpc.CommandSkippedException
	at com.databricks.spark.chauffeur.SequenceExecutionState.$anonfun$cancel$3(SequenceExecutionState.scala:134)
	at com.databricks.spark.chauffeur.SequenceExecutionState.$anonfun$cancel$3$adapted(SequenceExecutionState.scala:129)
	at scala.collection.immutable.Range.foreach(Range.scala:158)
	at com.databricks.spark.chauffeur.SequenceExecutionState.cancel(SequenceExecutionState.scala:129)
	at com.databricks.spark.chauffeur.ExecContextState.cancelRunningSequence(ExecContextState.scala:715)
	at com.databricks.spark.chauffeur.ExecContextState.$anonfun$cancel$1(ExecContextState.scala:435)
	at scala.Option.getOrElse(Option.scala:189)
	at com.databricks.spark.chauffeur.ExecContextState.cancel(ExecContextState.scala:435)
	at com.databricks.spark.chauffeur.ExecutionContextManagerV1.cancelExecution(ExecutionContextManagerV1.scala:466)
	at com.databricks.spark.chauffeur.ChauffeurState.$anonfun$process$1(ChauffeurState.scala:757)
	at com.data

In [0]:
# Extract and Label Beta Coefficients
print("Beta Coefficients:")
beta_mean = trace.posterior["beta"].mean(dim=("chain", "draw"))
beta_std = trace.posterior["beta"].std(dim=("chain", "draw"))

# Map subchannel indices back to subchannel names
subchannel_mapping = {v: k for k, v in subchannel_to_idx.items()}

# Print labeled beta coefficients
for sub_idx, (mean, std) in enumerate(zip(beta_mean.values, beta_std.values)):
    subchannel_name = subchannel_mapping[sub_idx]
    print(f"Subchannel '{subchannel_name}': Mean = {mean:.3f}, Std = {std:.3f}")

# Save labeled beta coefficients to CSV
labeled_beta = pd.DataFrame({
    "Subchannel": [subchannel_mapping[i] for i in range(len(beta_mean))],
    "Beta_Mean": beta_mean.values,
    "Beta_Std": beta_std.values
})

display(labeled_beta)

com.databricks.backend.common.rpc.CommandSkippedException
	at com.databricks.spark.chauffeur.SequenceExecutionState.$anonfun$cancel$3(SequenceExecutionState.scala:134)
	at com.databricks.spark.chauffeur.SequenceExecutionState.$anonfun$cancel$3$adapted(SequenceExecutionState.scala:129)
	at scala.collection.immutable.Range.foreach(Range.scala:158)
	at com.databricks.spark.chauffeur.SequenceExecutionState.cancel(SequenceExecutionState.scala:129)
	at com.databricks.spark.chauffeur.ExecContextState.cancelRunningSequence(ExecContextState.scala:715)
	at com.databricks.spark.chauffeur.ExecContextState.$anonfun$cancel$1(ExecContextState.scala:435)
	at scala.Option.getOrElse(Option.scala:189)
	at com.databricks.spark.chauffeur.ExecContextState.cancel(ExecContextState.scala:435)
	at com.databricks.spark.chauffeur.ExecutionContextManagerV1.cancelExecution(ExecutionContextManagerV1.scala:466)
	at com.databricks.spark.chauffeur.ChauffeurState.$anonfun$process$1(ChauffeurState.scala:757)
	at com.data