In [13]:
%load_ext autoreload
%autoreload 2

from pymc_marketing.mmm import MMM, GeometricAdstock, LogisticSaturation
from mmm_eval.data.synth_data_generator import generate_data
from mmm_eval.configs import PyMCConfig
from pymc_marketing.prior import Prior

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Generate data

In [14]:
data = generate_data()
data.to_csv("data.csv", index=False)

X = data.drop(columns=["revenue","quantity"])
y = data["quantity"]

# Fit a PyMC Model

In [15]:
base_model = MMM(
    date_column="date_week" ,
    channel_columns=["channel_1","channel_2"],
    adstock=GeometricAdstock(l_max=4),
    saturation=LogisticSaturation()
)

base_model.fit(X=X, y=y, chains=4, target_accept=0.85)

Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [intercept, adstock_alpha, saturation_lam, saturation_beta, y_sigma]


Output()

Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 5 seconds.
There were 1 divergences after tuning. Increase `target_accept` or reparameterize.


# Create a config

To run the evaluation suite, we need to store the configuration used to create the original model.

To do this, we create a `PyMCConfig` object

In [16]:
# We dont need X and y - we'll get those from the input data!
fit_kwargs = { 
    "chains": 4,
    "target_accept": 0.85,
}

base_config = PyMCConfig(base_model, fit_kwargs=fit_kwargs, response_column="quantity", revenue_column="revenue")

# Check that we can use the config to fit a PyMC MMM model

In [17]:
m1 = MMM(**base_config.model_config.config)
_ = m1.fit(X=X, y=y,**base_config.fit_config.config)

Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [intercept, adstock_alpha, saturation_lam, saturation_beta, y_sigma]


Output()

Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 6 seconds.
There were 7 divergences after tuning. Increase `target_accept` or reparameterize.


# Test a more complex config

In [18]:
total_spend_per_channel = data[["channel_1", "channel_2"]].sum(axis=0)

spend_share = total_spend_per_channel / total_spend_per_channel.sum()
n_channels = 2
prior_sigma = n_channels * spend_share.to_numpy()

model_config = my_model_config = {
    "intercept": Prior("Normal", mu=0.5, sigma=0.2),
    "saturation_beta": Prior("HalfNormal", sigma=prior_sigma),
    "gamma_control": Prior("Normal", mu=0, sigma=0.05),
    "gamma_fourier": Prior("Laplace", mu=0, b=0.2),
}

config = {
    "date_column": "date_week",
    "channel_columns": ["channel_1", "channel_2"],
    "control_columns": ["price", "event_1", "event_2"],
    "adstock": GeometricAdstock(l_max=4),  # or DelayedAdstock(), etc.
    "saturation": LogisticSaturation(),
    "yearly_seasonality": 2,
    "model_config": model_config,
}

fit_kwargs = { 
    "chains": 4,
    "target_accept": 0.85,
}

# fit another base model
m2 = MMM(**config)
_ = m2.fit(X=X, y=y, **fit_kwargs)

Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [intercept, adstock_alpha, saturation_lam, saturation_beta, gamma_control, gamma_fourier, y_sigma]


Output()

Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 14 seconds.


## Ensure we can run the core methods of the PyMCAdapter

In [19]:
from mmm_eval.adapters.experimental.pymc import PyMCAdapter

config = PyMCConfig(m2, fit_kwargs=fit_kwargs, response_column="quantity", revenue_column="revenue")
adapter = PyMCAdapter(config)

adapter.fit(data)

Control column 'price' has values outside [0, 1] range: min=5.0003, max=6.9575. Consider scaling this column to 0-1 range as per PyMC best practices.
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [intercept, adstock_alpha, saturation_lam, saturation_beta, gamma_control, gamma_fourier, y_sigma]


Output()

Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 15 seconds.


In [20]:
preds = adapter.predict(data)
len(preds)

Control column 'price' has values outside [0, 1] range: min=5.0003, max=6.9575. Consider scaling this column to 0-1 range as per PyMC best practices.
Sampling: [y]


Output()

179

In [21]:
adapter.get_channel_roi()

channel_1      13.521711
channel_2    2760.934510
dtype: float64

# Save the config to JSON

In [22]:
config.save_config(save_path="./", file_name="config1")

<mmm_eval.configs.configs.PyMCConfig at 0x2bd5afa10>

# And load it in a new instantiation

In [24]:
loaded_config = PyMCConfig.load_config("config1.json")
loaded_config.model_config.config

{'date_column': 'date_week',
 'channel_columns': ['channel_1', 'channel_2'],
 'adstock': GeometricAdstock(prefix='adstock', l_max=4, normalize=True, mode='After', priors={'alpha': Prior("Beta", alpha=1, beta=3, dims="channel")}),
 'saturation': LogisticSaturation(prefix='saturation', priors={'lam': Prior("Gamma", alpha=3, beta=1, dims="channel"), 'beta': Prior("HalfNormal", sigma=[1.31263903 0.68736097], dims="channel")}),
 'time_varying_intercept': False,
 'time_varying_media': False,
 'sampler_config': {},
 'validate_data': True,
 'control_columns': ['price', 'event_1', 'event_2'],
 'yearly_seasonality': 2,
 'adstock_first': True,
 'dag': None,
 'treatment_nodes': None,
 'outcome_node': None}

# Check if we can run the model from the loaded config

In [25]:
adapter = PyMCAdapter(loaded_config)
adapter.fit(data)

Control column 'price' has values outside [0, 1] range: min=5.0003, max=6.9575. Consider scaling this column to 0-1 range as per PyMC best practices.
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [intercept, adstock_alpha, saturation_lam, saturation_beta, gamma_control, gamma_fourier, y_sigma]


Output()

Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 14 seconds.


## Scratch

In [35]:
import numpy as np
import pandas as pd
import pytest
from pymc_marketing.mmm import GeometricAdstock, LogisticSaturation
from pymc_marketing.prior import Prior

# TODO: update this import once PyMCAdapter is promoted out of experimental
from mmm_eval.adapters.experimental.pymc import (
    PyMCAdapter,
    _check_columns_in_data,
    _validate_start_end_dates,
)
from mmm_eval.adapters.experimental.schemas import PyMCFitSchema, PyMCModelSchema
from mmm_eval.configs import PyMCConfig

model_config = {
    "intercept": Prior("Normal", mu=0.5, sigma=0.2),
    "saturation_beta": Prior("HalfNormal", sigma=[0.321, 0.123]),
    "gamma_control": Prior("Normal", mu=0, sigma=0.05),
    "gamma_fourier": Prior("Laplace", mu=0, b=0.2),
}

# Create PyMCModelSchema
pymc_model_config = PyMCModelSchema(
    date_column="date_week",
    channel_columns=["channel_1", "channel_2"],
    control_columns=["price", "event_1", "event_2"],
    adstock=GeometricAdstock(l_max=4),
    saturation=LogisticSaturation(),
    yearly_seasonality=2,
    model_config=model_config,
)

# Create PyMCFitSchema with the sampling parameters moved from sampler_config
fit_config = PyMCFitSchema(
    target_accept=0.9,
    chains=1,
    draws=10,
    tune=5,
    random_seed=42,
)

# Create PyMCConfig
config = PyMCConfig(
    pymc_model_config=pymc_model_config,
    fit_config=fit_config,
    response_column="quantity",
    revenue_column="revenue",
)

def realistic_test_data():
    """Create more realistic test data for PyMC integration tests."""
    np.random.seed(42)  # For reproducibility
    dates = pd.date_range("2023-01-01", periods=20, freq="W-MON")

    # Create correlated data
    channel_1 = np.random.uniform(50, 200, len(dates))
    channel_2 = np.random.uniform(30, 150, len(dates))

    # Create response with some correlation to channels
    base_response = 1000
    trend = np.linspace(0, 50, len(dates))
    seasonality = 20 * np.sin(2 * np.pi * np.arange(len(dates)) / 52)

    quantity = (
        base_response + trend + seasonality + 0.3 * channel_1 + 0.2 * channel_2 + np.random.normal(0, 30, len(dates))
    )

    price = 10 + 0.1 * np.arange(len(dates)) + np.random.normal(0, 0.5, len(dates))
    revenue = price * quantity

    # Create DataFrame
    df = pd.DataFrame(
        {
            "date_week": dates,
            "channel_1": channel_1,
            "channel_2": channel_2,
            "quantity": quantity,
            "price": price,
            "revenue": revenue,
            "event_1": np.random.choice([0, 1], len(dates), p=[0.9, 0.1]),
            "event_2": np.random.choice([0, 1], len(dates), p=[0.95, 0.05]),
        }
    )
    
    # Scale control columns to 0-1 range using maxabs scaling
    control_columns = ["price", "event_1", "event_2"]
    for col in control_columns:
        if col in df.columns:
            max_abs = np.abs(df[col]).max()
            if max_abs > 0:
                df[col] = df[col] / max_abs
    
    return df

data = realistic_test_data()

In [36]:
adapter = PyMCAdapter(config)

# Create data with one channel having zero spend
data = data.copy()
data["channel_1"] = 0  # Set channel_1 to zero spend

# Fit the model
adapter.fit(data)

{'date_column': 'date_week', 'channel_columns': ['channel_1', 'channel_2'], 'adstock': GeometricAdstock(prefix='adstock', l_max=4, normalize=True, mode='After', priors={'alpha': Prior("Beta", alpha=1, beta=3)}), 'saturation': LogisticSaturation(prefix='saturation', priors={'lam': Prior("Gamma", alpha=3, beta=1), 'beta': Prior("HalfNormal", sigma=2)}), 'time_varying_intercept': False, 'time_varying_media': False, 'sampler_config': None, 'validate_data': True, 'control_columns': ['price', 'event_1', 'event_2'], 'yearly_seasonality': 2, 'adstock_first': True, 'dag': None, 'treatment_nodes': None, 'outcome_node': None, 'model_config': {'intercept': Prior("Normal", mu=0.5, sigma=0.2), 'saturation_beta': Prior("HalfNormal", sigma=[0.321 0.123]), 'gamma_control': Prior("Normal", mu=0, sigma=0.05), 'gamma_fourier': Prior("Laplace", mu=0, b=0.2)}}
{'date_column': 'date_week', 'channel_columns': ['channel_2'], 'adstock': GeometricAdstock(prefix='adstock', l_max=4, normalize=True, mode='After', p

NameError: name 'stop' is not defined