In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_probability as tfp
import arviz as az

import IPython

from meridian import constants
from meridian.data import load
from meridian.data import test_utils
from meridian.model import model
from meridian.model import spec
from meridian.model import prior_distribution
from meridian.analysis import optimizer
from meridian.analysis import analyzer
from meridian.analysis import visualizer
from meridian.analysis import summarizer
from meridian.analysis import formatter

# check if GPU is available
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
print("Num CPUs Available: ", len(tf.config.experimental.list_physical_devices('CPU')))

Your runtime has 8.6 gigabytes of available RAM

Num GPUs Available:  0
Num CPUs Available:  1


In [2]:
df = pd.read_csv('sim_data_trunc.csv')

In [3]:
print(df.head())

         Date  Step  price_A      tv_A  web_A  price_B  tv_B  web_B  \
0  2022-01-02    52      5.0       0.0    0.0      5.0   0.0    0.0   
1  2022-01-09    53      5.0       0.0    0.0      5.0   0.0    0.0   
2  2022-01-16    54      5.0       0.0    0.0      5.0   0.0    0.0   
3  2022-01-23    55      5.0       0.0    0.0      5.0   0.0    0.0   
4  2022-01-30    56      5.0  125000.0    0.0      5.0   0.0    0.0   

   Total_Purchases_A  Total_Purchases_B  Total_Sales_A  Total_Sales_B  
0              618.0              243.0      3090000.0      1215000.0  
1              531.0              187.0      2655000.0       935000.0  
2              653.0              240.0      3265000.0      1200000.0  
3              521.0              207.0      2605000.0      1035000.0  
4              620.0              231.0      3100000.0      1155000.0  


In [4]:
# ... existing imports ...

# After df = pd.read_csv('sim_data_trunc.csv') and data checks

# Define column mappings for Meridian
coord_to_columns = load.CoordToColumns(
    time='Date',
    controls=['price_A', 'price_B'],
    kpi='Total_Sales_A',  # Target variable
    media=[
        'tv_A',
        'web_A',
        'tv_B', 
        'web_B'
    ],
    media_spend=[
        'tv_A',
        'web_A',
        'tv_B',
        'web_B'
    ],
    # Map other variables if available:
    # population='population_column', 
    # organic_media=['organic_channel'],
    # non_media_treatments=['Promo']
)

# Map media columns to channel names
media_to_channel = {
    'tv_A': 'TV Campaign A',
    'web_A': 'Digital Campaign A',
    'tv_B': 'TV Campaign B',
    'web_B': 'Digital Campaign B'
}

# Initialize data loader
loader = load.CsvDataLoader(
    csv_path='sim_data_trunc.csv',  # Your data file
    kpi_type='revenue',  # Using sales as KPI
    coord_to_columns=coord_to_columns,
    media_to_channel=media_to_channel,
    media_spend_to_channel=media_to_channel,  # Same mapping for spend
)

# Load and prepare data
data = loader.load()

# Configure model with reasonable priors
roi_mu = 0.3  # Prior belief for average ROI
roi_sigma = 0.2  # Uncertainty around ROI estimate
prior = prior_distribution.PriorDistribution(
    roi_m=tfp.distributions.LogNormal(roi_mu, roi_sigma)
)
model_spec = spec.ModelSpec(prior=prior)

# Initialize and run model
mmm = model.Meridian(input_data=data, model_spec=model_spec)
mmm.sample_prior(500)
mmm.sample_posterior(n_chains=7, n_adapt=500, n_burnin=500, n_keep=1000)

  self.df[geo_column_name] = self.df[geo_column_name].replace(
  if (constants.GEO) not in self.dataset.dims.keys():
  if constants.MEDIA_TIME not in self.dataset.dims.keys():
I0000 00:00:1739481888.379151  196879 service.cc:145] XLA service 0x3245961b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1739481888.379986  196879 service.cc:153]   StreamExecutor device (0): Host, Default Version
I0000 00:00:1739481888.522968  196879 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
  func=lambda x: np.nanmedian(x, axis=[0, 1]),
W0000 00:00:1739481912.173225  196879 assert_op.cc:38] Ignoring Assert operator mcmc_retry_init/assert_equal_1/Assert/AssertGuard/Assert


In [7]:
# After model fitting

# 1. Model diagnostics (correct API from docs)
model_diagnostics = visualizer.ModelDiagnostics(mmm)
model_diagnostics.plot_rhat_boxplot()  # Shows R-hat distribution
model_diagnostics.plot_trace()  # Shows chain convergence

# 2. Results summary (official method)
summary = summarizer.Summarizer(mmm).create_summary()
print(summarizer.Formatter().format_summary(summary))

# 3. Media contributions (verified approach)
visualizer.ChannelAttribution(mmm).plot_contribution_breakdown()

# 4. Budget optimization (as shown in docs)
optimizer = optimizer.BudgetOptimizer(mmm)
optimization_result = optimizer.optimize()
print(optimization_result.optimal_allocation)

MCMCSamplingError: MCMC sampling failed with a maximum R-hat value of 2582931046400.0.