### Importing the Libraries and Choosing the Root

In [2]:
import os, sys
from pathlib import Path
import pandas as pd

# Project root: .../MarketingMixModel_Meridian/
ROOT = Path("..").resolve()
DATA = ROOT / "data"
DATA.mkdir(exist_ok=True, parents=True)

print("ROOT:", ROOT)
print("DATA:", DATA)

ROOT: /Users/meryemcamci/Documents/GitHub/MarketingMixModel_Meridian
DATA: /Users/meryemcamci/Documents/GitHub/MarketingMixModel_Meridian/data


### Uploading the Meridian Synthetic Dataset 

In [4]:
import importlib

df = None
err_msgs = []

# 1) The most common way: meridian.examples.synthetic
try:
    synth = importlib.import_module("meridian.examples.synthetic")
    df = synth.load_example_dataset("synthetic_1")  # varsa "synthetic_2" de deneyebilirsin
    print("Loaded via meridian.examples.synthetic")
except Exception as e:
    err_msgs.append(("meridian.examples.synthetic", repr(e)))

# 2) In some of the versions: meridian.examples.data
if df is None:
    try:
        ex = importlib.import_module("meridian.examples.data")
        df = ex.load_example_dataset("synthetic_1")
        print("Loaded via meridian.examples.data")
    except Exception as e:
        err_msgs.append(("meridian.examples.data", repr(e)))

# 3) Other possible way: meridian.data.synthetic
if df is None:
    try:
        alt = importlib.import_module("meridian.data.synthetic")
        df = alt.load_example_dataset("synthetic_1")
        print("Loaded via meridian.data.synthetic")
    except Exception as e:
        err_msgs.append(("meridian.data.synthetic", repr(e)))

if df is None:
    print("⚠️ We could not find the synthethic dataset module. We are going to generate a large synthethic dataset in the next cell.")
else:
    print(df.shape)
    display(df.head())
    print(sorted(df.columns))


⚠️ We could not find the synthethic dataset module. We are going to generate a large synthethic dataset in the next cell.


As you can see, we get the error message we expected because Google Meridian's "examples/synthetic dataset" module is not in some of the releases or may have been removed. Therefore, we are going to generate our own dataset professionally. 

In [6]:
import numpy as np
import pandas as pd

def generate_synthetic_mmm(n_weeks=156, seed=42):
    rng = np.random.default_rng(seed)
    dates = pd.date_range("2021-01-04", periods=n_weeks, freq="W-MON")

    # Trend + seasonality (weekly)
    t = np.arange(n_weeks)
    seasonal = 1.0 + 0.15*np.sin(2*np.pi*t/52) + 0.05*np.cos(2*np.pi*t/26)

    # Spend channels
    tv = rng.gamma(4.5, 2_000, size=n_weeks)
    search = rng.gamma(3.2, 1_400, size=n_weeks)
    social = rng.gamma(2.8, 900, size=n_weeks)
    display = rng.gamma(2.0, 700, size=n_weeks)

    # Controls
    price_index = 100 + rng.normal(0, 1.0, size=n_weeks).cumsum()/10
    weekno = pd.Series(dates).dt.isocalendar().week.astype(int)
    holiday = ((weekno >= 51) | (weekno <= 1)).astype(int).to_numpy()
    promo = (rng.random(n_weeks) < 0.12).astype(int)

    # Adstock 
    def adstock(x, decay=0.6):
        y, carry = np.zeros_like(x, dtype=float), 0.0
        for i, v in enumerate(x):
            carry = v + decay*carry
            y[i] = carry
        return y

    tv_eff = adstock(tv, 0.6)
    search_eff = adstock(search, 0.5)
    social_eff = adstock(social, 0.55)
    display_eff = adstock(display, 0.45)

    # Saturation 
    def hill(x, alpha=0.6, beta=1.2):
        return (x**alpha) / (x**alpha + beta**alpha)

    tv_resp = 1200 * hill(tv_eff, 0.7, 1.5)
    search_resp = 900  * hill(search_eff, 0.8, 1.2)
    social_resp = 600  * hill(social_eff, 0.75, 1.0)
    display_resp = 400 * hill(display_eff, 0.65, 1.3)

    base = 80_000 * seasonal * (1 + 0.04*promo - 0.015*(price_index - np.mean(price_index)))
    noise = rng.normal(0, 6_000, size=n_weeks)

    revenue = base + tv_resp + search_resp + social_resp + display_resp + noise
    revenue = np.maximum(revenue, 5_000)

    df_gen = pd.DataFrame({
        "date": dates,
        "revenue": revenue.round(2),
        "tv_spend": tv.round(2),
        "search_spend": search.round(2),
        "social_spend": social.round(2),
        "display_spend": display.round(2),
        "price_index": price_index.round(2),
        "promo": promo,
        "holiday": holiday,
    })
    return df_gen

if 'df' not in globals() or df is None:
    df = generate_synthetic_mmm(n_weeks=208)  # 4 years ~ 208 weeks
    print("✅ Backup synthetic dataset generated:", df.shape)
    display(df.head())


✅ Backup synthetic dataset generated: (208, 9)


Unnamed: 0,date,revenue,tv_spend,search_spend,social_spend,display_spend,price_index,promo,holiday
0,2021-01-04,92990.47,9640.26,6064.77,949.22,1229.19,100.01,1,1
1,2021-01-11,85634.73,11787.83,4500.66,491.23,176.91,100.06,0,0
2,2021-01-18,84484.49,8866.21,8054.74,2263.99,248.7,100.04,0,0
3,2021-01-25,88037.52,8264.93,2451.78,4416.53,3144.29,99.87,1,0
4,2021-02-01,91586.01,12463.7,3785.3,2828.0,1461.39,99.97,0,0


### Getting the Dataset Ready for the MMM 

In [8]:
df = df.copy()

# KPI
if "revenue" not in df.columns and "kpi" in df.columns:
    df = df.rename(columns={"kpi": "revenue"})

# Normalizing the possible media channel names 
rename_map = {
    "tv": "tv_spend",
    "search": "search_spend",
    "social": "social_spend",
    "display": "display_spend",
}
df = df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns})

# Column check 
needed = ["date","revenue","tv_spend","search_spend","social_spend","display_spend"]
missing = [c for c in needed if c not in df.columns]
print("Missing (just info):", missing)

# Datetype and sorting
df["date"] = pd.to_datetime(df["date"])
df = df.sort_values("date").reset_index(drop=True)

display(df.head())
print("Final columns:", list(df.columns))
print("Shape:", df.shape)


Missing (just info): []


Unnamed: 0,date,revenue,tv_spend,search_spend,social_spend,display_spend,price_index,promo,holiday
0,2021-01-04,92990.47,9640.26,6064.77,949.22,1229.19,100.01,1,1
1,2021-01-11,85634.73,11787.83,4500.66,491.23,176.91,100.06,0,0
2,2021-01-18,84484.49,8866.21,8054.74,2263.99,248.7,100.04,0,0
3,2021-01-25,88037.52,8264.93,2451.78,4416.53,3144.29,99.87,1,0
4,2021-02-01,91586.01,12463.7,3785.3,2828.0,1461.39,99.97,0,0


Final columns: ['date', 'revenue', 'tv_spend', 'search_spend', 'social_spend', 'display_spend', 'price_index', 'promo', 'holiday']
Shape: (208, 9)


### Save the Dataset as CSV

In [10]:
from pathlib import Path

out_path = Path("..")/"data"/"marketing_data.csv"
df.to_csv(out_path, index=False)
print("✅ Saved:", out_path)

✅ Saved: ../data/marketing_data.csv
