# CausalImpact
CausalImpact in R: https://github.com/google/CausalImpact  
tfcausalimpact in Python: https://github.com/WillianFuks/tfcausalimpact

In [None]:
import warnings
warnings.simplefilter('ignore', FutureWarning)

import os
import pandas as pd
import seaborn as sns

import sys
sys.path.append("../") # Set parent directory to sys.path
sys.dont_write_bytecode = True
%load_ext autoreload
%autoreload 2
# import src.utils as utils
import src.ci_utils as ci_utils

palette0 = sns.color_palette(['#E69F00', '#56B4E9', '#009E73', '#F0E442', '#0072B2', '#D55E00', '#CC79A7', '#000000']) # Okabe-Ito
palette2 = sns.color_palette(["#D81B60", "#1E88E5", "#FFC107", "#004D40"])
palette = palette0
display(palette)
sns.set_palette(palette)
sns.set_theme(context='poster', style='ticks', palette=palette, font_scale=1.0)

## Configuration

In [None]:
# run_id = "run-05"
run_id = "run-10"
# run_id = "run-15"
# run_id = "run-20"

cfg_path = f"../output/causal-impact/{run_id}.yaml"
ci_cfg = ci_utils.load_from_yaml(cfg_path)
display(ci_cfg)

FIT_METHOD = ci_cfg['ci']['fit_method']
ALPHA_LEVEL = ci_cfg['ci']['alpha_level'] 
USE_COVARIATES = ci_cfg['ci']['use_covariates']
print(f"fit_method: {FIT_METHOD}")
print(f"alpha_level: {ALPHA_LEVEL}")
print(f"use_covariates: {USE_COVARIATES}")

## Select data types as list

In [None]:
# data_type_list = ["smoothed_VeDBA_2s"] # S-VeDBA
# data_type_list = ["speed_distance_km_h"]
# data_type_list = ["abs_diff_distance_m"] # AD-Speed
# data_type_list = ["pixel_count_p"]
data_type_list = ["abs_diff_pixel_count_p"] # AD-MPR
# data_type_list = ["smoothed_VeDBA_2s", "abs_diff_distance_m", "abs_diff_pixel_count_p"]

for data_type in data_type_list:
    print(f"run_id: {run_id} | data_type: {data_type}")
    print(f"before_sec: {ci_cfg['data'][data_type]['before_sec']}")
    print(f"after_sec: {ci_cfg['data'][data_type]['after_sec']}")
    (
        path_list, Y_col, X_cols, title_base
    ) = ci_utils.setup_for_causal_impact_analysis(data_type, USE_COVARIATES)
    print(len(path_list))

    for i, path in enumerate(path_list):
        session_name = os.path.basename(path).replace(".csv", "")
        # print("-----------------------------------------------------------------------")
        print(f"i: {i:2d} session_name: {session_name} | data_type: {data_type}")
        (
            df, data, pre_period, post_period, intervention_start_index
        ) = ci_utils.prep_data_for_causal_impact_analysis(path, Y_col, X_cols, data_type, ci_cfg)
        
        r_dir = f"../r-analysis/data/ci-data/{run_id}/{data_type}/{session_name}"
        os.makedirs(r_dir, exist_ok=True)
        data.to_csv(f'{r_dir}/data.csv', index=False)
        periods = pd.DataFrame({'pre_period': pre_period, 'post_period': post_period})
        periods.to_csv(f'{r_dir}/periods.csv', index=False)
        # display(periods)

In [None]:
print(len(pre_period))
print((pre_period))
display(data)
print(type(data))

## Run CausalImpact in R

In [None]:
# Do not run the CausalImpact analysis in Python, 
# as it places too high computational load on a normal laptop PC (Windows laptop). 
# Running the analysis using the original R package seems to have a significantly lower computational load.

## Run Causal Impact in Python (NOT Used)

In [None]:
# for j, data_type in enumerate(data_type_list):
#     (
#         path_list, Y_col, X_cols, title_base
#     ) = ci_utils.setup_for_causal_impact_analysis(data_type, ci_cfg)

#     # for i, path in enumerate(path_list):
#     for i, path in enumerate(path_list[36:]):
#     # for i, path in enumerate(path_list[0:1]): # test with the first data (index = 0) 
#     # for i, path in enumerate(path_list[0:5]): # test with the first 5 data (index = 0, 1, 2, 3, 4) 
        
#         # for debugging and plotting the figure 4
#         # if "LBP01_S00" in path:
#         #     print(path)
#         # elif "LBP03_S00" in path:
#         #     print(path)
#         # else:
#         #     continue # skip

#         print("-----------------------------------------------------------------------")
#         session_name = os.path.basename(path).replace(".csv", "")
#         print(f"session_name: {session_name} | data_type: {data_type}")
#         (
#             df, data, pre_period, post_period, intervention_start_index
#         ) = ci_utils.prep_data_for_causal_impact_analysis(path, Y_col, X_cols, data_type, ci_cfg)
        
#         # Check intervention_start_index and skip the data if the intervention was cancelled
#         print(f"intervention_start_index: {intervention_start_index}")
#         if intervention_start_index == 0:
#             print("Intervention cancelled -> Skip the data.")
#             continue
        
#         # Check the y values and skip if all values are zero.
#         y_val = data['y'].values
#         if np.all(y_val == 0):
#             print("!!! All y values are zero !!!")
#             print("You cannot run CausalImpact analysis. -> Add very small value (1e-7).")
#             # Exception handler
#             data = ci_utils.input_all_zeros_handler(data, pre_period)
#         else:
#             print("You can run CausalImpact analysis with the data.")

#         # Run CausalImpact model
#         ci = CausalImpact(data, pre_period, post_period, model_args={'fit_method': FIT_METHOD}, alpha=ALPHA_LEVEL) 
#         print(ci.summary())
        
#         # Save the results
#         save_dir = f"../output/causal-impact/{run_id}/{data_type}/{session_name}"
#         os.makedirs(save_dir, exist_ok=True)
#         fig_png_save_dir = f"../output/causal-impact/{run_id}/{data_type}/png"
#         os.makedirs(fig_png_save_dir, exist_ok=True)
#         # config data
#         config_dict = {
#             'alpha': ci.alpha,
#             'model_args': ci.model_args,
#         }
#         with open(f'{save_dir}/config.yaml', 'w') as file:
#             yaml.dump(config_dict, file, default_flow_style=False, allow_unicode=True)
#         # summary report
#         report_path = f"{save_dir}/report.txt"
#         ci_utils.save_ci_summary_report_as_txt(ci, report_path)
#         # summary data
#         df_summary = ci.summary_data.reset_index().rename(columns={'index': 'value_name'})
#         p_value = ci.p_value
#         df_summary.loc[len(df_summary)] = ['p_value', p_value, p_value]
#         df_summary.to_csv(f"{save_dir}/summary_data.csv", index=False)
#         # inference data
#         df_ci = pd.concat([data, ci.inferences], axis=1)
#         df_ci.to_csv(f"{save_dir}/df_ci.csv", index=False)
#         # plot
#         _data_type = data_type.replace("-", "_")
#         file_name = f"fig_ci_{session_name.lower()}_{_data_type}"
#         fig_title = f"{session_name} | {title_base}"
#         fig = ci_utils.vis_causal_impact(df_ci, alpha=0.03, title=fig_title, data_type=data_type, ci_cfg=ci_cfg)
#         fig.savefig(f"{save_dir}/{file_name}.svg", bbox_inches="tight", pad_inches=0.25, transparent=False)
#         fig.savefig(f"{fig_png_save_dir}/{file_name}.png", dpi=350, bbox_inches="tight", pad_inches=0.25, transparent=False)
#         plt.close()

In [None]:
# print(ci.summary_data)
# print(type(ci.summary_data))
# df_summary = ci.summary_data.reset_index().rename(columns={'index': 'value_name'})
# p_value = ci.p_value
# df_summary.loc[len(df_summary)] = ['p_value', p_value, p_value]
# display(df_summary)

In [None]:
# print(ci.summary())

In [None]:
# print(ci.summary('report'))