In [1]:
import numpy as np
import pandas as pd
import tensorflow_probability as tfp

from meridian import constants
from meridian.data import data_frame_input_data_builder as data_builder
from meridian.model import model
from meridian.model import spec
from meridian.model import prior_distribution

from mmm_eval import (
    MeridianConfig, MeridianInputDataBuilderSchema, run_evaluation)

import meridian

## Load your data

The data below comes from the Meridian repository and gives a great example of how the data should be formatted.

In [2]:
df = pd.read_excel(
    'https://github.com/google/meridian/raw/main/meridian/data/simulated_data/xlsx/geo_media.xlsx',
    engine='openpyxl',
)

In [3]:
builder = (
    data_builder.DataFrameInputDataBuilder(kpi_type='non_revenue')
        .with_kpi(df, kpi_col="conversions")
        .with_revenue_per_kpi(df, revenue_per_kpi_col="revenue_per_conversion")
        .with_population(df)
        .with_controls(df, control_cols=["GQV", "Discount", "Competitor_Sales"])
)
channels = ["Channel0", "Channel1", "Channel2", "Channel3", "Channel4", "Channel5"]
builder = builder.with_media(
    df,
    media_cols=[f"{channel}_impression" for channel in channels],
    media_spend_cols=[f"{channel}_spend" for channel in channels],
    media_channels=channels,
)

data = builder.build()

## Define a Meridian MMM

In [4]:
roi_mu = 0.2     # Mu for ROI prior for each media channel.
roi_sigma = 0.9  # Sigma for ROI prior for each media channel.
prior = prior_distribution.PriorDistribution(
    roi_m=tfp.distributions.LogNormal(roi_mu, roi_sigma, name=constants.ROI_M)
)
model_spec = spec.ModelSpec(prior=prior)
# sampling from the posterior is not required prior to evaluation
mmm = model.Meridian(input_data=data, model_spec=model_spec)

I0000 00:00:1752041208.026458 4076435 service.cc:148] XLA service 0x600000991a00 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1752041208.026482 4076435 service.cc:156]   StreamExecutor device (0): Host, Default Version
I0000 00:00:1752041208.033055 4076435 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


# I wonder if this MMM is any good... let's check!

First, you will need to specify how the MMM should be built to match the `DataFrameInputDataBuilder` logic above.

See the [Meridian data loading documentation](https://developers.google.com/meridian/docs/user-guide/supported-data-types-formats?hl=en) if you need more guidance.

### Preprocess data

We preprocess the data in two ways:

- calculate revenue from "revenue_per_conversion" as BenjaMMMin expects revenue, not revenue per KPI
- restricting dates and geos to reduce runtime

In [5]:
data_preproc = df.copy()
data_preproc["revenue"] = data_preproc["revenue_per_conversion"]*data_preproc["conversions"]

# restrict to only two geos
data_preproc = data_preproc[data_preproc["geo"].isin(["Geo0", "Geo1"])]

# restrict to only post-2023
data_preproc = data_preproc[pd.to_datetime(data_preproc["time"]) > pd.Timestamp("2023-01-01")]

In [6]:
channels = ["Channel0", "Channel1", "Channel2", "Channel3", "Channel4", "Channel5"]
input_data_builder_config = MeridianInputDataBuilderSchema(
    date_column="time",
    media_channels=channels,
    channel_spend_columns=[f"{col}_spend" for col in channels],
    channel_impressions_columns=[f"{col}_impression" for col in channels],
    response_column="conversions",
    control_columns=["GQV", "Competitor_Sales", "Discount"],
)

In [7]:
 # specify a larger number of samples if you want quality results
sample_posterior_kwargs = dict(n_chains=1, n_adapt=10, n_burnin=10, n_keep=10)
config = MeridianConfig.from_model_object(mmm, input_data_builder_config=input_data_builder_config,
                                          revenue_column="revenue", sample_posterior_kwargs=sample_posterior_kwargs)

In [9]:
# Run the evaluation suite!
result = run_evaluation(framework="meridian", config=config, data=data_preproc, test_names=["perturbation"])

2025-07-09 16:07:00,181 - mmm_eval.core.validation_test_orchestrator - INFO - Running test: perturbation
2025-07-09 16:07:29.055432: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
W0000 00:00:1752041249.859351 4076435 assert_op.cc:38] Ignoring Assert operator mcmc_retry_init/assert_equal_1/Assert/AssertGuard/Assert
W0000 00:00:1752041318.205306 4076435 assert_op.cc:38] Ignoring Assert operator mcmc_retry_init/assert_equal_1/Assert/AssertGuard/Assert
2025-07-09 16:09:15,131 - mmm_eval.core.validation_tests - INFO - Saving the test results for perturbation test


## Examine the results

In [10]:
# Let's see what we got
display(result)

Unnamed: 0,general_metric_name,specific_metric_name,metric_value,metric_pass,test_name,timestamp
0,percentage_change,percentage_change_Channel0,0.829907,False,perturbation,2025-07-09T16:09:15.133036
1,percentage_change,percentage_change_Channel1,3.66729,False,perturbation,2025-07-09T16:09:15.133036
2,percentage_change,percentage_change_Channel2,3.791029,False,perturbation,2025-07-09T16:09:15.133036
3,percentage_change,percentage_change_Channel3,0.939111,False,perturbation,2025-07-09T16:09:15.133036
4,percentage_change,percentage_change_Channel4,0.214822,False,perturbation,2025-07-09T16:09:15.133036
5,percentage_change,percentage_change_Channel5,0.699159,False,perturbation,2025-07-09T16:09:15.133036
