In [None]:
# Causal Impact Analysis Tutorial

This notebook demonstrates how to use the Causal Impact Engine to analyze the causal effect of an intervention (like a marketing campaign) on a target metric (like sales).

## What is Causal Impact Analysis?

Causal Impact Analysis is a statistical method for estimating the causal effect of an intervention on a time series. It uses a Bayesian structural time series model to predict what would have happened if the intervention had not occurred (the "counterfactual"), and then compares this prediction to what actually happened.

This approach helps answer questions like:
- Did our marketing campaign cause an increase in sales?
- What would our sales have been if we hadn't run the campaign?
- How much of the observed change can be attributed to the campaign?


In [None]:
# Import necessary libraries
import sys
import os
from pathlib import Path

# Add the parent directory to the path to import the package
sys.path.insert(0, str(Path().resolve().parent.parent))

# Import the causal impact engine
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from causal_impact_engine.data.sample_data import SampleData
from causal_impact_engine.models.model_factory import ModelFactory
from causal_impact_engine.utils.metrics import CausalImpactMetrics
from causal_impact_engine.utils.reporting import CausalImpactReporter
from causal_impact_engine.utils.visualization import CausalImpactVisualizer

# Set plot style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("deep")
plt.rcParams['figure.figsize'] = [12, 6]
plt.rcParams['figure.dpi'] = 100


In [None]:
## Generate Sample Marketing Campaign Data

Let's generate some sample data for a marketing campaign. This data will include:
- Daily sales data
- Web traffic data
- Ad spend data
- Campaign indicator (0 before campaign, 1 during campaign)

We'll simulate a campaign that started on April 1, 2022 and ran until the end of June 2022.


In [None]:
# Generate sample marketing campaign data
data = SampleData.get_marketing_campaign_data(random_seed=42)

# Display the first few rows
print("Sample data shape:", data.shape)
data.head()


In [None]:
# Let's visualize the data
fig = CausalImpactVisualizer.plot_time_series(
    data=data,
    date_col="date",
    target_col="sales",
    intervention_date="2022-04-01",
    title="Marketing Campaign Data: Sales Over Time"
)

plt.show()


In [None]:
# Let's also look at the covariates
fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=True)

# Plot web traffic
data.plot(x="date", y="web_traffic", ax=axes[0], label="Web Traffic")
axes[0].axvline(pd.to_datetime("2022-04-01"), color="r", linestyle="--", label="Campaign Start")
axes[0].set_title("Web Traffic Over Time")
axes[0].legend()

# Plot ad spend
data.plot(x="date", y="ad_spend", ax=axes[1], label="Ad Spend")
axes[1].axvline(pd.to_datetime("2022-04-01"), color="r", linestyle="--", label="Campaign Start")
axes[1].set_title("Ad Spend Over Time")
axes[1].legend()

plt.tight_layout()
plt.show()


In [None]:
## Running Causal Impact Analysis

Now let's run the causal impact analysis to determine if the marketing campaign had a causal effect on sales.

We'll define the pre-intervention period (before the campaign) and the post-intervention period (during the campaign), and then run the analysis using the Google CausalImpact model.


In [None]:
# Define pre and post-intervention periods
pre_period = ["2022-01-01", "2022-03-31"]
post_period = ["2022-04-01", "2022-06-30"]

# Create and run the model
model = ModelFactory.create_model(
    model_type="causalimpact",
    data=data,
    pre_period=pre_period,
    post_period=post_period,
    target_col="sales",
    date_col="date",
    covariates=["web_traffic", "ad_spend"]
)

# Run inference
model.run_inference()

# Get summary
summary = model.get_summary()

# Print key results
print(f"Relative Effect: {summary['relative_effect'] * 100:.2f}%")
print(f"Absolute Effect: {summary['average_effect']:.2f}")
print(f"Cumulative Effect: {summary['cumulative_effect']:.2f}")
print(f"p-value: {summary['p_value']:.4f}")
print(f"Statistically Significant: {summary['p_value'] < 0.05}")

# Plot results
fig = model.plot_results()
plt.show()


In [None]:
## Generating Reports

Let's generate some reports to summarize the results of our analysis.


In [None]:
# Get predictions
predictions = model.predict()

# Calculate metrics
metrics = CausalImpactMetrics.calculate_all_metrics(
    data=predictions,
    target_col="sales",
    prediction_col="prediction",
    intervention_col="campaign"
)

# Generate executive summary
executive_summary = CausalImpactReporter.generate_executive_summary(
    model_results=summary,
    intervention_name="marketing campaign",
    target_name="sales"
)

print(executive_summary)


In [None]:
# Generate technical report
technical_report = CausalImpactReporter.generate_technical_report(
    data=predictions,
    target_col="sales",
    prediction_col="prediction",
    intervention_col="campaign"
)

print(technical_report)


In [None]:
## Interactive Visualization

Let's create an interactive visualization of the results.


In [None]:
# Create interactive plot
interactive_plot = CausalImpactVisualizer.create_interactive_plot(
    data=predictions,
    date_col="date",
    target_col="sales",
    counterfactual_col="prediction",
    lower_col="prediction_lower",
    upper_col="prediction_upper",
    effect_col="effect",
    intervention_date="2022-04-01",
    title="Marketing Campaign Causal Impact Analysis"
)

# Display the interactive plot
interactive_plot


In [None]:
## Trying a Different Model

Let's also try the PyMC model, which uses a custom Bayesian structural time series approach.


In [None]:
# Create and run the PyMC model
pymc_model = ModelFactory.create_model(
    model_type="pymc",
    data=data,
    pre_period=pre_period,
    post_period=post_period,
    target_col="sales",
    date_col="date",
    covariates=["web_traffic", "ad_spend"],
    model_args={
        "num_samples": 1000,  # Reduced for faster execution in the notebook
        "chains": 2,
        "tune": 500,
        "seasonal_period": 7,  # Weekly seasonality
        "include_trend": True,
        "standardize": True
    }
)

# Run inference
pymc_model.run_inference()

# Get summary
pymc_summary = pymc_model.get_summary()

# Print key results
print(f"Relative Effect: {pymc_summary['relative_effect'] * 100:.2f}%")
print(f"Absolute Effect: {pymc_summary['average_effect']:.2f}")
print(f"Cumulative Effect: {pymc_summary['cumulative_effect']:.2f}")
print(f"p-value: {pymc_summary['p_value']:.4f}")
print(f"Statistically Significant: {pymc_summary['p_value'] < 0.05}")

# Plot results
fig = pymc_model.plot_results()
plt.show()


In [None]:
# Plot posterior distribution
fig = pymc_model.plot_posterior()
plt.show()


In [None]:
## Conclusion

In this tutorial, we've demonstrated how to use the Causal Impact Engine to analyze the causal effect of a marketing campaign on sales. We used both the Google CausalImpact model and a custom PyMC model to perform the analysis.

The results indicate that the marketing campaign had a statistically significant positive effect on sales. The estimated effect size is around 15%, which means that sales were approximately 15% higher than they would have been without the campaign.

This type of analysis is valuable for marketing attribution, as it helps separate the causal effect of marketing activities from other factors that might influence sales.
