# Demo of PyMC models

In [None]:
import pandas as pd
import arviz as az
import matplotlib.pyplot as plt
import pymc as pm

In [None]:
%config InlineBackend.figure_format = 'retina'
az.style.use("arviz-darkgrid")

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
export_images = False

## Synthetic Control

In [None]:
from causalpy.simulate_data import generate_synthetic_control_data

treatment_time = 70
data, weightings_true = generate_synthetic_control_data(treatment_time=treatment_time)
data.head()

In [None]:
from causalpy.pymc_experiments import SyntheticControl
from causalpy.pymc_models import WeightedSumFitter

# Note, we do not want an intercept in this model
thing = SyntheticControl(data,
                         treatment_time,
                         formula = "actual ~ 0 + a + b + c + d + e + f + g",
                         prediction_model=WeightedSumFitter())

thing.plot();

if export_images:
    plt.savefig('../img/synthetic_control_pymc.png', bbox_inches='tight', dpi=300, facecolor="white")

## Interrupted time series

In [None]:
# # Generate data
# from causalpy.simulate_data import generate_time_series_data

# df = generate_time_series_data("2017-01-01")
# df = df.loc[:, ['month', 'year', 't', 'y']]
# df.to_csv("../causalpy/data/its.csv")

In [None]:
# Load data
df = pd.read_csv('../causalpy/data/its.csv', parse_dates=["date"])
df.set_index("date", inplace=True)
treatment_time = pd.to_datetime("2017-01-01")
df.head()

In [None]:
from causalpy.pymc_experiments import InterruptedTimeSeries
from causalpy.pymc_models import LinearRegression

thing = InterruptedTimeSeries(df, 
                              treatment_time, 
                              formula = 'y ~ 1 + t + C(month)', 
                              prediction_model=LinearRegression())

fig, ax = thing.plot()

if export_images:
    plt.savefig('../img/interrupted_time_series_pymc.png', bbox_inches='tight', dpi=300, facecolor="white")

## Difference in differences

## Regression discontinuity

In [None]:
# # Generate data
# from causalpy.simulate_data import generate_regression_discontinuity_data
# df = generate_regression_discontinuity_data(true_treatment_threshold=0.5)
# df.to_csv('../causalpy/data/regression_discontinuity.csv', index=False)

In [None]:
# Load data
data = pd.read_csv('../causalpy/data/regression_discontinuity.csv')

In [None]:
from causalpy.pymc_experiments import RegressionDiscontinuity
from causalpy.pymc_models import LinearRegression

thing = RegressionDiscontinuity(data, 
                                formula = 'y ~ 1 + x + treated + x:treated',
                                prediction_model=LinearRegression(), 
                                treatment_threshold=0.5)
thing.plot()

if export_images:
    plt.savefig('../img/regression_discontinuity_pymc.png', bbox_inches='tight', dpi=300, facecolor="white")

TODO: apply GP model OR use non-linear features (i.e. polynomial or sin basis functions) in linear regression approach