# PyMC Marketing Colab Notebook  
*Hands‑on MMM & CLV in Python*

In [1]:
!pip install pymc-marketing[extra] --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.6/519.6 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.3/295.3 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[?25h

## Verify installation

In [2]:
import pymc_marketing as pmm
print('PyMC Marketing:', pmm.__version__)

PyMC Marketing: 0.15.0


In [3]:
# prompt: Generate synthetic weekly dataset improve this code make it more robust : import numpy as np, pandas as pd, arviz as az
# from pymc_marketing.mmm.utils import add_fourier_terms
# rng = np.random.default_rng(42)
# dates = pd.date_range('2018-04-01', '2021-09-01', freq='W-MON')
# df = pd.DataFrame({'date_week': dates})
# df['x1'] = rng.uniform(0, 1, len(df))
# df['x2'] = rng.uniform(0, 1, len(df))
# df['event_1'] = (df['date_week'] == '2019-05-13').astype(int)
# df['event_2'] = (df['date_week'] == '2020-09-14').astype(int)
# df['t'] = np.arange(len(df))
# df = add_fourier_terms(df, date_col='date_week', period=52, order=2)
# df.head()

import numpy as np
import pandas as pd
import arviz as az
import pymc_marketing as pmm

# from pmm.mmm.utils.add_fourier_terms import add_fourier_terms

# Verify installation (keeping this from the previous code)
print('PyMC Marketing:', pmm.__version__)

def generate_synthetic_weekly_data(start_date, end_date, freq='W-MON', num_features=2, fourier_period=52, fourier_order=2, random_state=42, event_dates=None):
    """
    Generates synthetic weekly data with specified characteristics.

    Args:
        start_date (str): Start date of the date range (e.g., '2018-04-01').
        end_date (str): End date of the date range (e.g., '2021-09-01').
        freq (str): Frequency of the date range (e.g., 'W-MON' for weekly Monday).
        num_features (int): Number of 'x' features to generate (e.g., x1, x2, ...).
        fourier_period (int): Period for Fourier terms (e.g., 52 for weekly data).
        fourier_order (int): Order of Fourier terms.
        random_state (int): Seed for the random number generator for reproducibility.
        event_dates (list): A list of strings representing dates for event columns.

    Returns:
        pandas.DataFrame: A DataFrame containing the synthetic data.
    """
    rng = np.random.default_rng(random_state)
    dates = pd.date_range(start_date, end_date, freq=freq)
    df = pd.DataFrame({'date_week': dates})

    # Generate features
    for i in range(1, num_features + 1):
        df[f'x{i}'] = rng.uniform(0, 1, len(df))

    # Generate event columns
    if event_dates:
        for i, event_date_str in enumerate(event_dates):
            try:
                event_date = pd.to_datetime(event_date_str)
                df[f'event_{i + 1}'] = (df['date_week'] == event_date).astype(int)
            except ValueError:
                print(f"Warning: Invalid event date format for '{event_date_str}'. Skipping event column.")


    # Add a time index
    df['t'] = np.arange(len(df))

    # Add Fourier terms
    if fourier_period and fourier_order:
        df = pmm.mmm.utils.add_fourier_terms (df, date_col='date_week', period=fourier_period, order=fourier_order)

    return df

# Example Usage with more robust parameters
start_date = '2018-04-01'
end_date = '2021-09-01'
num_features = 3 # Generate x1, x2, x3
event_dates = ['2019-05-13', '2020-09-14', '2021-01-01'] # Add more event dates
fourier_period = 52
fourier_order = 3 # Increase Fourier order

df = generate_synthetic_weekly_data(
    start_date=start_date,
    end_date=end_date,
    num_features=num_features,
    event_dates=event_dates,
    fourier_period=fourier_period,
    fourier_order=fourier_order,
    random_state=123 # Use a different random state
)

print(df.head())
print("\nDataFrame Info:")
df.info()
print("\nDataFrame Description:")
print(df.describe())


ImportError: cannot import name 'add_fourier_terms' from 'pymc_marketing.mmm.utils' (/usr/local/lib/python3.11/dist-packages/pymc_marketing/mmm/utils.py)

## Generate synthetic weekly dataset

In [None]:
import numpy as np, pandas as pd, arviz as az
from pymc_marketing.mmm.utils import add_fourier_terms

rng = np.random.default_rng(42)
dates = pd.date_range('2018-04-01', '2021-09-01', freq='W-MON')
df = pd.DataFrame({'date_week': dates})
df['x1'] = rng.uniform(0, 1, len(df))
df['x2'] = rng.uniform(0, 1, len(df))
df['event_1'] = (df['date_week'] == '2019-05-13').astype(int)
df['event_2'] = (df['date_week'] == '2020-09-14').astype(int)
df['t'] = np.arange(len(df))
df = add_fourier_terms(df, date_col='date_week', period=52, order=2)
df.head()

## Specify and fit MMM

In [None]:
from pymc_marketing.mmm import MMM, GeometricAdstock, LogisticSaturation
from pymc_marketing.prior import Prior

model_cfg = {
    "intercept": Prior("Normal", mu=0.5, sigma=0.2),
    "saturation_beta": Prior("HalfNormal", sigma=[1.0, 1.0]),
    "gamma_control": Prior("Normal", mu=0, sigma=0.05),
    "gamma_fourier": Prior("Laplace", mu=0, b=0.2),
    "likelihood": Prior("Normal", sigma=Prior("HalfNormal", sigma=6)),
}

mmm = MMM(
    model_config=model_cfg,
    date_column="date_week",
    adstock=GeometricAdstock(l_max=8),
    saturation=LogisticSaturation(),
    channel_columns=["x1", "x2"],
    control_columns=["event_1", "event_2", "t"],
    yearly_seasonality=2,
)

X = df[['x1', 'x2', 'event_1', 'event_2', 't']]
y = rng.normal(loc=10 + 3*df['x1'] + 2*df['x2'], scale=1.0)

mmm.fit(X, y,
        chains=4,
        target_accept=0.9,
        nuts_sampler='numpyro',
        random_seed=rng)

## Diagnostics & posterior predictive

In [None]:
import matplotlib.pyplot as plt
mmm.plot_posterior_predictive(original_scale=True)
plt.show()
mmm.plot_components_contributions(original_scale=True)
plt.show()

## Forecasting scenario

In [None]:
future = df.tail(8).copy()
future['x1'] *= 1.2
future['x2'] *= 0.8
forecast = mmm.predict(future, include_last_observations=True)
forecast.head()