# Demo of PyMC models

In [None]:
import arviz as az
import matplotlib.pyplot as plt
import pandas as pd
import pathlib

In [None]:
%config InlineBackend.figure_format = 'svg'
az.style.use("arviz-darkgrid")

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
export_images = True

## Synthetic Control

In [None]:
sc_data_path = pathlib.Path.cwd().parents[0] / 'causalpy' / 'data' / 'synthetic_control.csv'
df = pd.read_csv(sc_data_path)
treatment_time = 70

In [None]:
from causalpy.pymc_experiments import SyntheticControl
from causalpy.pymc_models import WeightedSumFitter

# Note, we do not want an intercept in this model
thing = SyntheticControl(df,
                         treatment_time,
                         formula = "actual ~ 0 + a + b + c + d + e + f + g",
                         prediction_model=WeightedSumFitter())

thing.plot();

if export_images:
    plt.savefig(pathlib.Path.cwd().parents[0] / 'img' / 'synthetic_control_pymc.svg', bbox_inches='tight', format="svg")

## Interrupted time series

In [None]:
its_data_path = pathlib.Path.cwd().parents[0] / 'causalpy' / 'data' / 'its.csv'
df = pd.read_csv(its_data_path, parse_dates=["date"])
df.set_index("date", inplace=True)
treatment_time = pd.to_datetime("2017-01-01")

In [None]:
from causalpy.pymc_experiments import InterruptedTimeSeries
from causalpy.pymc_models import LinearRegression

thing = InterruptedTimeSeries(df, 
                              treatment_time, 
                              formula = 'y ~ 1 + t + C(month)', 
                              prediction_model=LinearRegression())

fig, ax = thing.plot()

if export_images:
    plt.savefig(pathlib.Path.cwd().parents[0] / 'img' / 'interrupted_time_series_pymc.svg', bbox_inches='tight', format="svg")

## Difference in differences

In [None]:
did_data_path = pathlib.Path.cwd().parents[0] / 'causalpy' / 'data' / 'did.csv'

df = pd.read_csv(did_data_path)

In [None]:
from causalpy.pymc_experiments import DifferenceInDifferences
from causalpy.pymc_models import LinearRegression

# NOTE: `treated` is a deterministic function of `t` and `group`. So add this function into the formula.

thing = DifferenceInDifferences(df, 
                                formula = 'y ~ 1 + group + t + treated:group',
                                prediction_model=LinearRegression())
fig, ax = thing.plot()

if export_images:
    plt.savefig(pathlib.Path.cwd().parents[0] / 'img' / 'difference_in_differences_pymc.svg', bbox_inches='tight', format="svg")

## Regression discontinuity

In [None]:
rd_data_path = pathlib.Path.cwd().parents[0] / 'causalpy' / 'data' / 'regression_discontinuity.csv'
df = pd.read_csv(rd_data_path)
df

In [None]:
from causalpy.pymc_experiments import RegressionDiscontinuity
from causalpy.pymc_models import LinearRegression

thing = RegressionDiscontinuity(df, 
                                formula = 'y ~ 1 + bs(x, df=6) + treated',
                                prediction_model=LinearRegression(), 
                                treatment_threshold=0.5)
thing.plot()

if export_images:
    plt.savefig(pathlib.Path.cwd().parents[0] / 'img' / 'regression_discontinuity_pymc.svg', bbox_inches='tight', format="svg")