# MADS SIADS 591 Milestone 1 Project (Preprocessing Steps). 

In [3]:
# Python module. 
import os, re
import pandas as pd 
import altair as alt 

# Change the current directory from (./notebook) to root directory. 
if re.match(r".+/notebook", os.getcwd()): 
	os.chdir("..") 

# Custom module. 
from modules.process_tickerdata import * 
from modules.consolidate_eventdates import * 
from modules.compute_aggregations import * 

# Recession. 
from config.config import RECESSIONS 

# For clearing safe warnings. Not important. 
from IPython.core.display import clear_output
clear_output() 

## Configurations (general). 

In [4]:
# Pandas DF config. 
pd.set_option("display.max_rows", 50, "display.max_columns", 50, "display.max_colwidth", 200)

# For clearing the output. Not important. 
clear_output()

# Data Preparation. 

## Read Ticker Data. 

In [5]:
use_csv = True

# Get and process ticker data. 
df_tickers = ProcessTickerData(use_csv=use_csv, start_date="1998-12-01", end_date="2021-12-17") 

if not use_csv: 
	df_tickers.write_to_csv() 

# Preview. 
df_tickers.df 

Read from (sector_price_history_processed_stg_1.csv)


Unnamed: 0,date,open,high,low,close,volume,dividends,stock splits,ticker,price_chg_c2o,price_chg_o2c,price_chg_c2c,volume_rollmed,volume_diff_to_med,volume_pchg_from_med,bo_upper,bo_lower,tscore_bo,tscore_c2o,tscore_o2c,tscore_c2c,vix_open,vix_close,vix_chg_c2c,vix_tscore_c2c
0,1998-12-22,12.09,12.09,11.97,12.02,55887,0.0,0.0,XLF,,-0.005790,,,,,,,,,,,24.05,22.78,-0.045264,
1,1998-12-23,11.97,12.20,11.97,12.20,78784,0.0,0.0,XLF,-0.004160,0.019215,0.014975,,,,,,,,,,21.89,20.21,-0.112818,
2,1998-12-24,12.20,12.28,12.16,12.28,43824,0.0,0.0,XLF,0.000000,0.006557,0.006557,,,,,,,,,,21.00,21.48,0.062840,
3,1998-12-28,12.27,12.27,12.09,12.12,51948,0.0,0.0,XLF,-0.000814,-0.012225,-0.013029,,,,,,,,,,22.92,23.50,0.094041,
4,1998-12-29,12.13,12.25,11.99,12.25,100819,0.0,0.0,XLF,0.000825,0.009893,0.010726,,,,,,,,,,23.68,22.18,-0.056170,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56912,2021-12-10,56.70,56.74,55.50,56.51,19524200,0.0,0.0,XLE,0.010875,-0.003351,0.007488,26789050.0,-7264850.0,-0.271187,61.999383,25.533321,1.397864,0.717619,-0.214470,0.259242,21.27,18.69,-0.133920,-1.558853
56913,2021-12-13,56.00,56.17,54.67,54.94,28604000,0.0,0.0,XLE,-0.009025,-0.018929,-0.027783,27344050.0,1259950.0,0.046078,62.064044,25.594697,1.218627,-0.814246,-1.138809,-1.347036,19.29,20.31,0.086677,0.961243
56914,2021-12-14,54.60,55.55,54.52,54.71,28798800,0.0,0.0,XLE,-0.006189,0.002015,-0.004186,27952200.0,846600.0,0.030287,62.125984,25.655071,1.186641,-0.592534,0.112473,-0.267460,19.67,21.89,0.077794,0.866882
56915,2021-12-15,54.63,54.77,53.23,54.45,33858100,0.0,0.0,XLE,-0.001462,-0.003295,-0.004752,28378450.0,5479650.0,0.193092,62.182052,25.713892,1.151912,-0.229931,-0.199923,-0.289205,21.60,19.29,-0.118776,-1.390069


# Data Aggregation. 

## Compute Aggregations. 

In [6]:
use_csv = True

# Compute aggregations. 
df_tickers_with_events = ConsolidateDates() 
df_aggregated_data = AggregateMeasures(use_csv=use_csv, ticker_event_dates=df_tickers_with_events) 

if not use_csv: 
	df_aggregated_data.ticker_event_dates.write_to_csv() 
	df_aggregated_data.write_to_csv() 

# Preview. 
df_aggregated_data.df 

Read from (sector_price_history_processed_stg_1.csv)
Loading Event Dates
Read from (observance_dates_ext.csv)
Read from (firsttrdrday_ofmonth.csv)
Read from (santa_rally.csv)
Read from (triple_witching_week.csv)
Loading Economic Reporting Dates From CSV
Read from (economic_reported_date.csv)
Loading News Headlines From CSV
Read from (raw_partner_headlines.csv)
Adding Event Flag columns to ticker history
Read from (sector_price_history_processed_stg_3.csv)


Unnamed: 0,ticker,factor,price_chg_c2c_dir_0,price_chg_c2c_dir_1,price_chg_c2o_dir_0,price_chg_c2o_dir_1,volume_pchg_from_med_dir_0,volume_pchg_from_med_dir_1,price_chg_o2c_dir_0,price_chg_o2c_dir_1,price_chg_c2c_mag_0,price_chg_c2c_mag_1,price_chg_c2c_mag_diff,price_chg_o2c_mag_0,price_chg_o2c_mag_1,price_chg_o2c_mag_diff,tscore_c2o_mag_0,tscore_c2o_mag_1,tscore_c2o_mag_diff,price_chg_c2o_mag_0,price_chg_c2o_mag_1,price_chg_c2o_mag_diff,tscore_o2c_mag_0,tscore_o2c_mag_1,tscore_o2c_mag_diff,tscore_bo_mag_0,tscore_bo_mag_1,tscore_bo_mag_diff,vix_tscore_c2c_mag_0,vix_tscore_c2c_mag_1,vix_tscore_c2c_mag_diff,tscore_c2c_mag_0,tscore_c2c_mag_1,tscore_c2c_mag_diff,vix_chg_c2c_mag_0,vix_chg_c2c_mag_1,vix_chg_c2c_mag_diff
0,XHB,black_friday,0.508243,0.500000,0.536064,0.526786,0.501318,0.392857,0.488540,0.553571,0.013898,0.016297,0.002399,0.012309,0.013863,0.001555,0.711412,0.809036,0.097624,0.006554,0.007506,0.000952,0.748515,0.793564,0.045049,1.273023,1.417851,0.144828,0.702894,0.757572,0.054679,0.747045,0.825685,0.078640,0.055074,0.059339,0.004265
1,XLB,black_friday,0.518762,0.583851,0.531389,0.515528,0.505510,0.472050,0.494666,0.546584,0.010722,0.011354,0.000632,0.008967,0.008262,-0.000705,0.712800,0.751296,0.038496,0.005547,0.006103,0.000556,0.738788,0.699864,-0.038923,1.287871,1.449059,0.161187,0.717950,0.708400,-0.009550,0.745100,0.781193,0.036093,0.050670,0.051788,0.001118
2,XLE,black_friday,0.512716,0.515528,0.534412,0.453416,0.518699,0.546584,0.496622,0.540373,0.012559,0.015385,0.002826,0.010443,0.010723,0.000280,0.750188,0.940421,0.190233,0.006508,0.008352,0.001844,0.775497,0.755058,-0.020438,1.310782,1.217934,-0.092848,0.717950,0.708400,-0.009550,0.763900,0.908417,0.144517,0.050670,0.051788,0.001118
3,XLF,black_friday,0.505780,0.527950,0.513783,0.515528,0.514905,0.416149,0.490932,0.472050,0.011646,0.013643,0.001997,0.009676,0.009416,-0.000260,0.688771,0.735464,0.046693,0.006599,0.007742,0.001144,0.736076,0.678641,-0.057434,1.287748,1.488943,0.201195,0.717950,0.708400,-0.009550,0.732619,0.811002,0.078382,0.050670,0.051788,0.001118
4,XLI,black_friday,0.532278,0.534161,0.528721,0.521739,0.534598,0.422360,0.505156,0.540373,0.009330,0.009542,0.000212,0.007803,0.007556,-0.000247,0.726451,0.756203,0.029752,0.005524,0.006020,0.000496,0.739514,0.726380,-0.013133,1.344456,1.445484,0.101028,0.717950,0.708400,-0.009550,0.738511,0.752706,0.014195,0.050670,0.051788,0.001118
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
622,XLP,news_interest_rate,0.516809,0.512524,0.508262,0.514451,0.521344,0.539499,0.501519,0.533719,0.006790,0.006509,-0.000282,0.006114,0.005399,-0.000715,0.681316,0.769954,0.088637,0.003875,0.003182,-0.000693,0.720503,0.837183,0.116679,1.388237,1.391238,0.003002,0.713981,0.752758,0.038777,0.719393,0.844616,0.125223,0.049369,0.064215,0.014846
623,XLU,news_interest_rate,0.525546,0.547206,0.536372,0.512524,0.523856,0.539499,0.486517,0.539499,0.008344,0.008668,0.000324,0.007642,0.007500,-0.000141,0.672638,0.820142,0.147504,0.004070,0.003034,-0.001036,0.728043,0.878306,0.150262,1.382423,1.267904,-0.114518,0.713981,0.752758,0.038777,0.724675,0.904174,0.179499,0.049369,0.064215,0.014846
624,XLY,news_interest_rate,0.527445,0.531792,0.526496,0.576108,0.515550,0.529865,0.520129,0.520231,0.009819,0.008242,-0.001577,0.008322,0.006312,-0.002010,0.688160,0.782380,0.094220,0.005547,0.004577,-0.000970,0.737201,0.794174,0.056973,1.378097,1.514122,0.136025,0.713981,0.752758,0.038777,0.732904,0.844232,0.111328,0.049369,0.064215,0.014846
625,XRT,news_interest_rate,0.522189,0.514451,0.538166,0.531792,0.498785,0.516378,0.497782,0.502890,0.012075,0.010192,-0.001883,0.010395,0.008415,-0.001979,0.709988,0.787585,0.077598,0.006168,0.005059,-0.001109,0.774654,0.816439,0.041785,1.398852,1.188501,-0.210351,0.691402,0.752758,0.061355,0.762063,0.849804,0.087741,0.054009,0.064215,0.010206


In [7]:
df_tickers_with_events

<modules.consolidate_eventdates.ConsolidateDates at 0x7fe066f21f10>

## Identify Convergence Across Different Metrics. 

In [8]:
df_identify_convergence = df_aggregated_data.identify_covergence() 
df_identify_convergence

Unnamed: 0,ticker,factor,price_chg_c2c_dir_0,price_chg_c2c_dir_1,price_chg_c2o_dir_0,price_chg_c2o_dir_1,volume_pchg_from_med_dir_0,volume_pchg_from_med_dir_1,price_chg_o2c_dir_0,price_chg_o2c_dir_1,price_chg_c2c_mag_0,price_chg_c2c_mag_1,price_chg_o2c_mag_0,price_chg_o2c_mag_1,tscore_c2o_mag_0,tscore_c2o_mag_1,price_chg_c2o_mag_0,price_chg_c2o_mag_1,tscore_o2c_mag_0,tscore_o2c_mag_1,tscore_bo_mag_0,tscore_bo_mag_1,vix_tscore_c2c_mag_0,vix_tscore_c2c_mag_1,tscore_c2c_mag_0,tscore_c2c_mag_1,vix_chg_c2c_mag_0,vix_chg_c2c_mag_1
0,XHB,black_friday,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,XLB,black_friday,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,XLE,black_friday,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
3,XLF,black_friday,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,XLI,black_friday,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
622,XLP,news_interest_rate,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
623,XLU,news_interest_rate,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
624,XLY,news_interest_rate,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
625,XRT,news_interest_rate,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
cols = [
	"ticker", "factor", 
	"tscore_c2o_mag_1", "tscore_c2c_mag_1", "tscore_bo_mag_1", "vix_tscore_c2c_mag_1", 
	"price_chg_c2o_dir_1", "price_chg_c2c_dir_1", "volume_pchg_from_med_dir_1", 
]

# Set the conditions for identifying convergence. 
boo_conditions = \
	(df_identify_convergence["tscore_c2c_mag_1"] == 1.0) & \
	(df_identify_convergence["volume_pchg_from_med_dir_1"] == 1.0) 

# Preview. 
df_identify_convergence.loc[boo_conditions, cols] 

Unnamed: 0,ticker,factor,tscore_c2o_mag_1,tscore_c2c_mag_1,tscore_bo_mag_1,vix_tscore_c2c_mag_1,price_chg_c2o_dir_1,price_chg_c2c_dir_1,volume_pchg_from_med_dir_1
2,XLE,black_friday,1.0,1.0,1.0,0.0,1.0,1.0,1.0
23,XLB,columbus,0.0,1.0,1.0,0.0,1.0,1.0,1.0
24,XLE,columbus,0.0,1.0,1.0,0.0,1.0,1.0,1.0
35,XLE,cyber_monday,1.0,1.0,1.0,0.0,1.0,1.0,1.0
276,XLB,ism_pmi_manufacturer,0.0,1.0,1.0,0.0,1.0,1.0,1.0
279,XLI,ism_pmi_manufacturer,0.0,1.0,1.0,0.0,1.0,1.0,1.0
501,XLP,gdp_advance_us,0.0,1.0,1.0,0.0,1.0,1.0,1.0
528,XHB,fomc_presscf,0.0,1.0,1.0,0.0,1.0,1.0,1.0
529,XLB,fomc_presscf,1.0,1.0,1.0,0.0,1.0,1.0,1.0
530,XLE,fomc_presscf,1.0,1.0,1.0,0.0,1.0,1.0,1.0


# Visualise Aggregates. 

## Analyse The Magnitude Of T-Scores & Probabilities. 

In [43]:
# Configureation for text formatting. 
format_text = ".1f"

# Configuration for scale range limit. 
scale_range_lim = [0.7, 0.9] 

# Define the columns you want to visualise.
cols_to_vis = ["ticker", "factor", "tscore_c2o_mag_1"] 

# Base encoding. 
base = alt.Chart(df_aggregated_data.df [cols_to_vis]) \
    .encode(
        x=alt.X(
			"factor:N", 
			axis=alt.Axis(title="factor", titleFontSize=14, labelFontSize=10), 
		), 
        y=alt.Y(
            "ticker:N", 
            axis=alt.Axis(title="factor", titleFontSize=14, labelFontSize=10), 
        ),
        tooltip=[
            alt.Tooltip(f"{cols_to_vis[-1]}:Q", title=cols_to_vis[-1], format=format_text), 
        ], 
    ) \
    .properties(height=350, width=1300) 

# Visualisation approach. 
heatmap = base.mark_rect(opacity=1) \
    .encode(
        color=alt.Color(
            f"{cols_to_vis[-1]}:Q", 
			scale=alt.Scale(domain=scale_range_lim, scheme="goldred", reverse=False),
            legend=alt.Legend(direction="vertical"), 
        )
	) 

# Annotation. 
text = base \
    .mark_text(baseline="middle") \
    .encode(text=alt.Text(f"{cols_to_vis[-1]}:Q", format=format_text)) 

# Visualise the plot. 
(heatmap + text).interactive() 

In [11]:
# Configureation for text formatting. 
format_text = ".1f"

# Configuration for scale range limit. 
scale_range_lim = [0.3, 0.7] 

# Define the columns you want to visualise.
cols_to_vis = ["ticker", "factor", "volume_pchg_from_med_dir_1"] 

# Base encoding. 
base = alt.Chart(df_aggregated_data.df [cols_to_vis]) \
    .encode(
        x=alt.X(
			"factor:N", 
			axis=alt.Axis(title="factor", titleFontSize=14, labelFontSize=10), 
		), 
        y=alt.Y(
            "ticker:N", 
            axis=alt.Axis(title="factor", titleFontSize=14, labelFontSize=10), 
        ),
        tooltip=[
            alt.Tooltip(f"{cols_to_vis[-1]}:Q", title=cols_to_vis[-1], format=format_text), 
        ], 
    ) \
    .properties(height=350, width=1300) 

# Visualisation approach. 
heatmap = base.mark_rect(opacity=1) \
    .encode(
        color=alt.Color(
            f"{cols_to_vis[-1]}:Q", 
			scale=alt.Scale(domain=scale_range_lim, scheme="goldred", reverse=False),
            legend=alt.Legend(direction="vertical"), 
        )
	) 

# Annotation. 
text = base \
    .mark_text(baseline="middle") \
    .encode(text=alt.Text(f"{cols_to_vis[-1]}:Q", format=format_text)) 

# Visualise the plot. 
(heatmap + text).interactive() 

In [12]:
# Configureation for text formatting. 
format_text = ".1f"

# Configuration for scale range limit. 
scale_range_lim = [0.7, 1.0] 

# Define the columns you want to visualise.
cols_to_vis = ["ticker", "factor", "vix_tscore_c2c_mag_1"] 

# Base encoding. 
base = alt.Chart(df_aggregated_data.df [cols_to_vis]) \
    .encode(
        x=alt.X(
			"factor:N", 
			axis=alt.Axis(title="factor", titleFontSize=14, labelFontSize=10), 
		), 
        y=alt.Y(
            "ticker:N", 
            axis=alt.Axis(title="factor", titleFontSize=14, labelFontSize=10), 
        ),
        tooltip=[
            alt.Tooltip(f"{cols_to_vis[-1]}:Q", title=cols_to_vis[-1], format=format_text), 
        ], 
    ) \
    .properties(height=350, width=1300) 

# Visualisation approach. 
heatmap = base.mark_rect(opacity=1) \
    .encode(
        color=alt.Color(
            f"{cols_to_vis[-1]}:Q", 
			scale=alt.Scale(domain=scale_range_lim, scheme="goldred", reverse=False),
            legend=alt.Legend(direction="vertical"), 
        )
	) 

# Annotation. 
text = base \
    .mark_text(baseline="middle") \
    .encode(text=alt.Text(f"{cols_to_vis[-1]}:Q", format=format_text)) 

# Visualise the plot. 
(heatmap + text).interactive() 

## Analyse The Effect Of FOMC Over Time. 

In [17]:
# Configureation for text formatting. 
format_text = ".1f"

# Consolidate the recession dates. 
df_recessions = pd.DataFrame(RECESSIONS)

# For adding horizontal line. 
df_hline_hilo = pd.DataFrame(data={"upper": [0.9], "lower": [-0.9]})

# Define the factor and measure to generate the visuals. 
specify_factor = "fomc_presscf"
specify_measure = "tscore_c2c"

# Define the columns you want to visualise.
cols_to_vis = ["date", "ticker", specify_factor, specify_measure] 

# Visualise the plot. 
combined_plot = alt.vconcat() 

for ticker in df_tickers_with_events.df["ticker"].unique(): 

    boo_fil = (df_tickers_with_events.df[specify_factor] == 1) & (df_tickers_with_events.df["ticker"] == ticker)
    df_tickers_fil = df_tickers_with_events.df.loc[boo_fil, cols_to_vis] 

    # Base encoding. 
    base = alt.Chart(df_tickers_fil) \
        .encode(
            x=alt.X(
                "date:T", 
                axis=alt.Axis(title="", titleFontSize=14, labelFontSize=10), 
            ), 
            y=alt.Y(
                f"{specify_measure}:Q", 
                axis=alt.Axis(title=specify_measure, titleFontSize=14, labelFontSize=10), 
            ),
            tooltip=[
                alt.Tooltip(f"{cols_to_vis[-1]}:Q", title=cols_to_vis[-1], format=format_text), 
            ], 
        ) \
        .properties(title=ticker, height=75, width=1150) 

    # Visualisation approach. 
    scatter = base.mark_point(opacity=1, color="orange", filled=True, size=50) 

    # Add horizontal lines. 
    hline_hi = alt.Chart(df_hline_hilo) \
        .encode(y="upper") \
        .mark_rule(color="black") 

    hline_lo = alt.Chart(df_hline_hilo) \
        .encode(y="lower") \
        .mark_rule(color="black") 

    vline_recess_dotcom = alt.Chart(df_recessions) \
        .transform_filter(alt.datum.recession == "DotCom 2001") \
        .encode(x="date_start:T", x2="date_end:T") \
        .mark_rect(color="black", opacity=.2) 

    vline_recess_debt = alt.Chart(df_recessions) \
        .transform_filter(alt.datum.recession == "DebtCrisis 2008") \
        .encode(x="date_start:T", x2="date_end:T") \
        .mark_rect(color="black", opacity=.2) 

    vline_recess_covid = alt.Chart(df_recessions) \
        .transform_filter(alt.datum.recession == "Covid 2019") \
        .encode(x="date_start:T", x2="date_end:T") \
        .mark_rect(color="black", opacity=.2) 

    # Concat visuals row by row. 
    combined_plot &= (scatter + hline_hi + hline_lo + vline_recess_dotcom + vline_recess_debt + vline_recess_covid).interactive() 

# Preview visual. 
combined_plot 

In [18]:
# Configureation for text formatting. 
format_text = ".1f"

# Consolidate the recession dates. 
df_recessions = pd.DataFrame(RECESSIONS)

# For adding horizontal line. 
df_hline_hilo = pd.DataFrame(data={"upper": [0.9], "lower": [-0.9]})

# Define the factor and measure to generate the visuals. 
specify_factor = "fomc_presscf"
specify_measure = "vix_tscore_c2c"

# Define the columns you want to visualise.
cols_to_vis = ["date", "ticker", specify_factor, specify_measure] 

# Visualise the plot. 
combined_plot = alt.vconcat() 

for ticker in df_tickers_with_events.df["ticker"].unique(): 

    boo_fil = (df_tickers_with_events.df[specify_factor] == 1) & (df_tickers_with_events.df["ticker"] == ticker)
    df_tickers_fil = df_tickers_with_events.df.loc[boo_fil, cols_to_vis] 

    # Base encoding. 
    base = alt.Chart(df_tickers_fil) \
        .encode(
            x=alt.X(
                "date:T", 
                axis=alt.Axis(title="", titleFontSize=14, labelFontSize=10), 
            ), 
            y=alt.Y(
                f"{specify_measure}:Q", 
                axis=alt.Axis(title=specify_measure, titleFontSize=14, labelFontSize=10), 
            ),
            tooltip=[
                alt.Tooltip(f"{cols_to_vis[-1]}:Q", title=cols_to_vis[-1], format=format_text), 
            ], 
        ) \
        .properties(title=ticker, height=75, width=1150) 

    # Visualisation approach. 
    scatter = base.mark_point(opacity=1, color="orange", filled=True, size=50) 

    # Add horizontal lines. 
    hline_hi = alt.Chart(df_hline_hilo) \
        .encode(y="upper") \
        .mark_rule(color="black") 

    hline_lo = alt.Chart(df_hline_hilo) \
        .encode(y="lower") \
        .mark_rule(color="black") 

    vline_recess_dotcom = alt.Chart(df_recessions) \
        .transform_filter(alt.datum.recession == "DotCom 2001") \
        .encode(x="date_start:T", x2="date_end:T") \
        .mark_rect(color="black", opacity=.2) 

    vline_recess_debt = alt.Chart(df_recessions) \
        .transform_filter(alt.datum.recession == "DebtCrisis 2008") \
        .encode(x="date_start:T", x2="date_end:T") \
        .mark_rect(color="black", opacity=.2) 

    vline_recess_covid = alt.Chart(df_recessions) \
        .transform_filter(alt.datum.recession == "Covid 2019") \
        .encode(x="date_start:T", x2="date_end:T") \
        .mark_rect(color="black", opacity=.2) 

    # Concat visuals row by row. 
    combined_plot &= (scatter + hline_hi + hline_lo + vline_recess_dotcom + vline_recess_debt + vline_recess_covid).interactive() 

# Preview visual. 
combined_plot 

## Analyse The Effect Of ISM PMI Manufacturer Over Time. 

In [19]:
# Configureation for text formatting. 
format_text = ".1f"

# Consolidate the recession dates. 
df_recessions = pd.DataFrame(RECESSIONS)

# For adding horizontal line. 
df_hline_hilo = pd.DataFrame(data={"upper": [0.9], "lower": [-0.9]})

# Define the factor and measure to generate the visuals. 
specify_factor = "ism_pmi_manufacturer"
specify_measure = "tscore_c2c"

# Define the columns you want to visualise.
cols_to_vis = ["date", "ticker", specify_factor, specify_measure] 

# Visualise the plot. 
combined_plot = alt.vconcat() 

for ticker in ["XLB", "XLI"]: 

    boo_fil = (df_tickers_with_events.df[specify_factor] == 1) & (df_tickers_with_events.df["ticker"] == ticker)
    df_tickers_fil = df_tickers_with_events.df.loc[boo_fil, cols_to_vis] 

    # Base encoding. 
    base = alt.Chart(df_tickers_fil) \
        .encode(
            x=alt.X(
                "date:T", 
                axis=alt.Axis(title="", titleFontSize=14, labelFontSize=10), 
            ), 
            y=alt.Y(
                f"{specify_measure}:Q", 
                axis=alt.Axis(title=specify_measure, titleFontSize=14, labelFontSize=10), 
            ),
            tooltip=[
                alt.Tooltip(f"{cols_to_vis[-1]}:Q", title=cols_to_vis[-1], format=format_text), 
            ], 
        ) \
        .properties(title=ticker, height=75, width=1150) 

    # Visualisation approach. 
    scatter = base.mark_point(opacity=1, color="orange", filled=True, size=50) 

    # Add horizontal lines. 
    hline_hi = alt.Chart(df_hline_hilo) \
        .encode(y="upper") \
        .mark_rule(color="black") 

    hline_lo = alt.Chart(df_hline_hilo) \
        .encode(y="lower") \
        .mark_rule(color="black") 

    vline_recess_dotcom = alt.Chart(df_recessions) \
        .transform_filter(alt.datum.recession == "DotCom 2001") \
        .encode(x="date_start:T", x2="date_end:T") \
        .mark_rect(color="black", opacity=.2) 

    vline_recess_debt = alt.Chart(df_recessions) \
        .transform_filter(alt.datum.recession == "DebtCrisis 2008") \
        .encode(x="date_start:T", x2="date_end:T") \
        .mark_rect(color="black", opacity=.2) 

    vline_recess_covid = alt.Chart(df_recessions) \
        .transform_filter(alt.datum.recession == "Covid 2019") \
        .encode(x="date_start:T", x2="date_end:T") \
        .mark_rect(color="black", opacity=.2) 

    # Concat visuals row by row. 
    combined_plot &= (scatter + hline_hi + hline_lo + vline_recess_dotcom + vline_recess_debt + vline_recess_covid).interactive() 

# Preview visual. 
combined_plot 

## Analyse The Effect Of OPEC Over Time. 

In [21]:
# Configureation for text formatting. 
format_text = ".1f"

# Consolidate the recession dates. 
df_recessions = pd.DataFrame(RECESSIONS)

# For adding horizontal line. 
df_hline_hilo = pd.DataFrame(data={"upper": [0.9], "lower": [-0.9]})

# Define the factor and measure to generate the visuals. 
specify_factor = "opec"
specify_measure = "tscore_c2c"

# Define the columns you want to visualise.
cols_to_vis = ["date", "ticker", specify_factor, specify_measure] 

# Visualise the plot. 
combined_plot = alt.vconcat() 

for ticker in ["XLB", "XLI", "XLE", "XLF", "XLK", "XLP", "XLU"]: 

    boo_fil = (df_tickers_with_events.df[specify_factor] == 1) & (df_tickers_with_events.df["ticker"] == ticker)
    df_tickers_fil = df_tickers_with_events.df.loc[boo_fil, cols_to_vis] 

    # Base encoding. 
    base = alt.Chart(df_tickers_fil) \
        .encode(
            x=alt.X(
                "date:T", 
                axis=alt.Axis(title="", titleFontSize=14, labelFontSize=10), 
            ), 
            y=alt.Y(
                f"{specify_measure}:Q", 
                axis=alt.Axis(title=specify_measure, titleFontSize=14, labelFontSize=10), 
            ),
            tooltip=[
                alt.Tooltip(f"{cols_to_vis[-1]}:Q", title=cols_to_vis[-1], format=format_text), 
            ], 
        ) \
        .properties(title=ticker, height=75, width=1150) 

    # Visualisation approach. 
    scatter = base.mark_point(opacity=1, color="orange", filled=True, size=50) 

    # Add horizontal lines. 
    hline_hi = alt.Chart(df_hline_hilo) \
        .encode(y="upper") \
        .mark_rule(color="black") 

    hline_lo = alt.Chart(df_hline_hilo) \
        .encode(y="lower") \
        .mark_rule(color="black") 

    vline_recess_dotcom = alt.Chart(df_recessions) \
        .transform_filter(alt.datum.recession == "DotCom 2001") \
        .encode(x="date_start:T", x2="date_end:T") \
        .mark_rect(color="black", opacity=.2) 

    vline_recess_debt = alt.Chart(df_recessions) \
        .transform_filter(alt.datum.recession == "DebtCrisis 2008") \
        .encode(x="date_start:T", x2="date_end:T") \
        .mark_rect(color="black", opacity=.2) 

    vline_recess_covid = alt.Chart(df_recessions) \
        .transform_filter(alt.datum.recession == "Covid 2019") \
        .encode(x="date_start:T", x2="date_end:T") \
        .mark_rect(color="black", opacity=.2) 

    # Concat visuals row by row. 
    combined_plot &= (scatter + hline_hi + hline_lo + vline_recess_dotcom + vline_recess_debt + vline_recess_covid).interactive() 

# Preview visual. 
combined_plot 

## Analyse The Distribution Of FOMC. 

In [42]:
# Configureation for text formatting. 
format_text = ".1f"

# Define the factor and measure to generate the visuals. 
specify_factor = "fomc_presscf"
specify_measure = "tscore_c2c"

# Define the columns you want to visualise.
cols_to_vis = ["ticker", specify_factor, specify_measure] 

# Visualise the plot. 
combined_plot = alt.vconcat() 

for ticker in ["XLB"]: 

    boo_fil = (df_tickers_with_events.df["ticker"] == ticker)
    df_tickers_fil = df_tickers_with_events.df.loc[boo_fil, cols_to_vis].sample(5000).copy()

    # Base encoding. 
    base = alt.Chart(df_tickers_fil) \
        .encode(
            x=alt.X(
                f"{specify_factor}:N",  
                axis=alt.Axis(title=specify_factor, titleFontSize=14, labelFontSize=10), 
            ), 
            y=alt.Y(
                f"{specify_measure}:Q", 
                axis=alt.Axis(title=specify_measure, titleFontSize=14, labelFontSize=10), 
            ), 
        ) \
        .properties(title=ticker, height=400, width=400) 

    # Visualisation approach. 
    box = base.mark_boxplot() 

    # Concat visuals row by row. 
    combined_plot &= (box).interactive() 

# Preview visual. 
combined_plot 

## Analyse The Distribution Of ISM PMI Manufacture. 

In [36]:
# Configureation for text formatting. 
format_text = ".1f"

# Define the factor and measure to generate the visuals. 
specify_factor = "ism_pmi_manufacturer"
specify_measure = "tscore_c2c"

# Define the columns you want to visualise.
cols_to_vis = ["ticker", specify_factor, specify_measure] 

# Visualise the plot. 
combined_plot = alt.vconcat() 

for ticker in ["XLB"]: 

    boo_fil = (df_tickers_with_events.df["ticker"] == ticker)
    df_tickers_fil = df_tickers_with_events.df.loc[boo_fil, cols_to_vis].sample(5000).copy()

    # Base encoding. 
    base = alt.Chart(df_tickers_fil) \
        .encode(
            x=alt.X(
                f"{specify_factor}:N",  
                axis=alt.Axis(title=specify_factor, titleFontSize=14, labelFontSize=10), 
            ), 
            y=alt.Y(
                f"{specify_measure}:Q", 
                axis=alt.Axis(title=specify_measure, titleFontSize=14, labelFontSize=10), 
            ), 
        ) \
        .properties(title=ticker, height=400, width=400) 

    # Visualisation approach. 
    box = base.mark_boxplot() 

    # Concat visuals row by row. 
    combined_plot &= (box).interactive() 

# Preview visual. 
combined_plot 

In [40]:
# Configureation for text formatting. 
format_text = ".1f"

# Define the factor and measure to generate the visuals. 
specify_factor = "ism_pmi_manufacturer"
specify_measure = "tscore_c2c"

# Define the columns you want to visualise.
cols_to_vis = ["ticker", specify_factor, specify_measure] 

# Visualise the plot. 
combined_plot = alt.vconcat() 

for ticker in ["XLB"]: 

    boo_fil = (df_tickers_with_events.df["ticker"] == ticker)
    df_tickers_fil = df_tickers_with_events.df.loc[boo_fil, cols_to_vis].sample(5000).copy()

    # Base encoding. 
    base = alt.Chart(df_tickers_fil) \
        .encode(
            x=alt.X(
                f"{specify_measure}:Q", 
                axis=alt.Axis(title=specify_factor, titleFontSize=14, labelFontSize=10), 
                bin=alt.Bin(maxbins=100), 
            ), 
            y=alt.Y(
                "count()", 
                axis=alt.Axis(title=specify_measure, titleFontSize=14, labelFontSize=10), 
            ),
            tooltip=[
                alt.Tooltip(f"{specify_factor}:N"), 
            ], 
        ) \
        .properties(title=ticker, height=400, width=400) 

    # Visualisation approach. 
    box = base.mark_bar(opacity=.3, binSpacing=0) \
        .encode(
            color=alt.Color(
                f"{specify_factor}:N",  
                legend=alt.Legend(direction="vertical"), 
            )
        ) 

    # Concat visuals row by row. 
    combined_plot &= (box).interactive() 

# Preview visual. 
combined_plot 

# Rule's Rules Computational Narratives. 

- __Rule 2: Document the process, not just the results:__ The processes for the initial investigation, data processing, and analysis are documented in addition to presenting the results. Comments are also included to explain the specific steps or breakdown.  

- __Rule 3: Use cell divisions to make steps clear:__ The processes for the initial investigation, data processing, and analysis are broken into separate cells to ensure that each process is digestible, understandable, and organised. All the configurations and functions related to each process are organised into a single cell to make it easier to locate them. Comments are given for each configuration and function to articulate the purpose and steps. 

- __Rule 4: Modularize code:__ Functions are used to adhere to the DRY (Don't Repeat Yourself) principle. Each function serves only a specific purpose so that each functionality is clear when readers reads or applies the function name. They can be used together with other function or reconfigured with different parameters since they are generalised. 

- __Rule 5: Record dependencies:__ The dependencies are recorded inside the `Pipfile` and `Pipfile.lock`. Readers or researchers can easily install all the libraries in their own virtual environment using `pipenv` or `venv`. Virtual environment and package manager is important to avoid polluting your own base libraries or dependencies conflict. I have also included a `Dockerfile` as an alternative option for readers or researchers to kickstart the project easily. However, they need publicly accessible IP using tool such as `ngrok` to access the Jupyterlab on the browser outside of the container. Docker is safer in case some libraries might be affected by the differences in OS. 

- __Rule 8: Share and explain your data:__ The data (unprocessed version) is shared with the lecturers or graders so that they can reproduce the results. The explanation for the data is provided briefly to help readers familiarise with the nature of the data, limitations, assumptions, definitions, unit of measurement, and others. And also to help them accurately evaluate the reasons behind the data processing and analysis better. 

- __Rule 9: Design your notebooks to be read, run, and explored:__ Readers or researchers can explore the notebook with lesser difficulty since the processes are organised into separate cells and modularised into separate functions. They can reconfigure the settings, tweak the functions, and print the results in-between the cells to evaluate the processess, learn about the analysis, or explore a different direction. Brief installation steps and dependency requirements are provided to help them start and replicate the project easily. 