# Meridian Geo-Level MMM

In this post, we build a geo-level Marketing Mix Model (MMM) using `meridian` on a large-scale dataset to evaluate its performance in estimating true marketing contributions. We'll compare the model's inferred contributions against known ground truth, providing a realistic assessment of its accuracy. Additionally, we profile the model's runtime and memory usage to understand its scalability in production-like settings.

In [1]:
import time
from pympler import asizeof

from statsmodels.stats.stattools import durbin_watson

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_probability as tfp
import arviz as az
import seaborn as sns
import matplotlib.pyplot as plt

import IPython

from meridian import constants
from meridian.data import load
from meridian.data import test_utils
from meridian.data import input_data
from meridian.data import data_frame_input_data_builder
from meridian.model import model
from meridian.model import spec
from meridian.model import prior_distribution
from meridian.analysis import optimizer
from meridian.analysis import analyzer
from meridian.analysis import visualizer
from meridian.analysis import summarizer
from meridian.analysis import formatter

from pymc_marketing.paths import data_dir

import warnings
warnings.simplefilter("ignore")

In [2]:
seed: int = sum(map(ord, "mmm_multidimensional"))
rng: np.random.Generator = np.random.default_rng(seed=seed)

### The Data

To be replaced with a larger dataset, generated by Luca.

In [3]:
data_path = data_dir / "mmm_example.csv"

raw_data_df = pd.read_csv(data_path, parse_dates=["date_week"]).rename(columns={"date_week": "date"})

a_data_df = raw_data_df.copy().assign(geo="geo_a")
b_data_df = raw_data_df.copy().assign(geo="geo_b")

# Add noise to the target variable for the second geo
b_data_df["y"] = b_data_df["y"] + 500 * rng.normal(size=len(b_data_df))

# Concatenate the two datasets
data_df = pd.concat([a_data_df, b_data_df])

# We want all geos scaled equally.
data_df['population'] = 1

# Naming convention for Meridian
data_df = data_df.rename(columns = {"date": "time"})

data_df.head()

Unnamed: 0,time,y,x1,x2,event_1,event_2,dayofyear,t,geo,population
0,2018-04-02,3984.662237,0.31858,0.0,0.0,0.0,92,0,geo_a,1
1,2018-04-09,3762.871794,0.112388,0.0,0.0,0.0,99,1,geo_a,1
2,2018-04-16,4466.967388,0.2924,0.0,0.0,0.0,106,2,geo_a,1
3,2018-04-23,3864.219373,0.071399,0.0,0.0,0.0,113,3,geo_a,1
4,2018-04-30,4441.625278,0.386745,0.0,0.0,0.0,120,4,geo_a,1


In [4]:
builder = (
    data_frame_input_data_builder.DataFrameInputDataBuilder(kpi_type='revenue')
    .with_kpi(data_df, kpi_col="y")
    .with_population(data_df)
    .with_controls(
        data_df, control_cols=["event_1", "event_2"]
    )
    .with_media(
    data_df,
    media_cols=["x1", "x2"],
    media_spend_cols=["x1", "x2"],
    media_channels=["x1", "x2"],
    )
)

data = builder.build()

### Prior Specification 

- Using spend shares as prior for beta parameters, independent across all geo's.
- Hierarchical structure accross the saturation parameters - Meridian does by default.
- Setting knots to occur every 26 weeks, to best align with seasonality of order 2.

In [5]:
channel_columns = ["x1", "x2"]
n_channels = len(channel_columns)

sum_spend_per_channel = data_df[channel_columns].sum(axis = 0)

spend_share = (
    sum_spend_per_channel / sum_spend_per_channel.sum()
).to_numpy()

prior_sigma = n_channels * spend_share

In [6]:
n_time = len(data.time)
knots = np.arange(0, n_time, 26).tolist() # ~ seasonality of order 2

In [7]:
prior = prior_distribution.PriorDistribution(
    beta_m = tfp.distributions.HalfNormal(
        prior_sigma, 
        name = constants.BETA_M,
    )
)

**Model Specification**

This is a geo-level model with the following specification:

$$
y_{g,t} = \mu_{t} +\tau_{g} + \sum_{i=1}^{N_C} \gamma^{[C]}_{g,i} z_{g,t,i} 
+ \sum_{i=1}^{N_M} \beta^{[M]}_{g,i} \text{HillAdstock} \left( \{ x^{[M]}_{g,t-s,i} \}_{s=0}^{L} ; \alpha^{[M]}_i, ec^{[M]}_{i}, slope^{[M]}_{i} \right) + \epsilon_{g,t}
$$

Where:

- $\mu_{t}$ represents the intercept at time $t$.
- $\tau_{g}$ represents the intercept for geo $g$.
- $\sum_{i=1}^{N_C} \gamma^{[C]}_{g,i} z_{g,t,i}$ represents the control contribution at time $t$ for geo $g$.
- $\sum_{i=1}^{N_M} \beta^{[M]}_{g,i} \text{HillAdstock} \left( \{ x^{[M]}_{g,t-s,i} \}_{s=0}^{L} ; \alpha^{[M]}_i, ec^{[M]}_{i}, slope^{[M]}_{i} \right)$ represents the adstocked saturated media contribution at time $t$ for geo $g$.
- $\epsilon_{g,t}$ represents the error at time $t$ for geo $g$

In [8]:
# MMM Model Specification

model_spec = spec.ModelSpec(
    prior=prior_distribution.PriorDistribution(),
    media_effects_dist='log_normal',
    hill_before_adstock=False,
    max_lag=8,
    unique_sigma_for_each_geo=True,
    roi_calibration_period=None,
    rf_roi_calibration_period=None,
    knots=knots,
    baseline_geo=None,
    holdout_id=None,
    control_population_scaling_id=None,
    media_prior_type = 'coefficient',
    rf_prior_type = 'coefficient',
)

### Model Building

In [9]:
meridian = model.Meridian(input_data=data, model_spec=model_spec)

I0000 00:00:1751366267.113806 9639098 service.cc:148] XLA service 0x6000021a8c00 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1751366267.113836 9639098 service.cc:156]   StreamExecutor device (0): Host, Default Version
I0000 00:00:1751366267.123202 9639098 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


### Sample the Prior

Meridian doesn't have funcitonality to sample or plot the prior predictive.

In [24]:
meridian.sample_prior(1000)

### Model Fitting
- Inspects convergence.
- Calculates run speed.
- Calculates memory footprint.

**Note**: No progress bar.

In [25]:
start = time.perf_counter()

meridian.sample_posterior(
    n_chains=4, 
    n_adapt=1000, 
    n_burnin=500, 
    n_keep=1000)

end = time.perf_counter()
print(f"Sampling took {end - start:.2f} seconds")

W0000 00:00:1751367027.800706 9639098 assert_op.cc:38] Ignoring Assert operator mcmc_retry_init/assert_equal_1/Assert/AssertGuard/Assert


Sampling took 104.76 seconds


In [26]:
divergences = meridian.inference_data.sample_stats.diverging.sum().item()
print(f"Number of divergences: {divergences}")

r_hat_max = az.summary(
    meridian.inference_data,
    var_names=[
        "alpha_m",
        "beta_gm",
        "beta_m",
        "ec_m",
        "gamma_c",
        "gamma_gc",
        "sigma",
        "tau_g",
        "xi_c",
        "knot_values",
        "mu_t",
    ],
)['r_hat'].max()

print(f"Maximum r_hat: {r_hat_max}")

Number of divergences: 161
Maximum r_hat: 1.01


In [31]:
model_size = asizeof.asizeof(meridian)
print("Note that pymc-marketing stores the data as data objects and stores contributions as deterministics, meridian does not.")
print(f"Model size: {model_size / (1024 ** 2):.2f} MB")

Note that pymc-marketing stores the data as data objects and stores contributions as deterministics, meridian does not.
Model size: 4.85 MB


### Predictive Performance

In [33]:
model_fit = visualizer.ModelFit(meridian)
model_fit.plot_model_fit(n_top_largest_geos=2,
                         show_geo_level=True,
                         include_baseline=False,
                         include_ci=True)

## Media Effectiveness Recovery

- Contributions over time
- ROAS recovery

In [34]:
# Fetch contributions over time
model_analysis = analyzer.Analyzer(meridian)
incremental_df = model_analysis.incremental_outcome(aggregate_times=False, use_kpi=True)

incremental_df # has dims chain, draw, date, geo

<tf.Tensor: shape=(4, 1000, 179, 2), dtype=float32, numpy=
array([[[[3231.6562  ,    0.      ],
         [2531.474   ,    0.      ],
         [3555.1953  ,    0.      ],
         ...,
         [2657.712   , 1186.687   ],
         [3515.3867  ,  300.438   ],
         [4514.8384  ,   67.98047 ]],

        [[2621.6025  ,    0.      ],
         [2167.271   ,    0.      ],
         [3064.1064  ,    0.      ],
         ...,
         [2254.441   , 1419.6089  ],
         [3028.7495  ,  445.12598 ],
         [4000.6626  ,  129.45068 ]],

        [[3339.837   ,    0.      ],
         [2382.0576  ,    0.      ],
         [3428.064   ,    0.      ],
         ...,
         [2614.295   , 1629.5957  ],
         [3395.122   ,  657.4946  ],
         [4225.8013  ,  249.36377 ]],

        ...,

        [[2548.4365  ,    0.      ],
         [1902.7988  ,    0.      ],
         [2787.0625  ,    0.      ],
         ...,
         [2021.7695  , 1481.3296  ],
         [2752.8276  ,  553.98486 ],
         [3650

In [None]:
# Fetch contributions over time
model_analysis = analyzer.Analyzer(meridian)
incremental_df = model_analysis.incremental_outcome(aggregate_times=False, use_kpi=True)

mean_contributions = tf.reduce_mean(incremental_df, axis=[0,1])

lower_bound = tfp.stats.percentile(incremental_df, 3, axis=[0, 1])  
upper_bound = tfp.stats.percentile(incremental_df, 97, axis=[0, 1]) 

mean_contribution_df = pd.DataFrame(tf.reduce_mean(incremental_df, axis=[0,1]).numpy())
mean_contribution_df.columns = data.media.coords['media_channel'].values
mean_contribution_df['date_week'] = data.time

lower_contribution_df = pd.DataFrame(tfp.stats.percentile(incremental_df, 5, axis=[0, 1]).numpy())
lower_contribution_df.columns = data.media.coords['media_channel'].values
lower_contribution_df['date_week'] = data.time

upper_contribution_df = pd.DataFrame(tfp.stats.percentile(incremental_df, 95, axis=[0, 1]).numpy())
upper_contribution_df.columns = data.media.coords['media_channel'].values
upper_contribution_df['date_week'] = data.time