# Baseline Features

The baseline features include: `Timestamp`, `Open`, `High`, `Low`, `Close`, `Volume`.
- Working with the full index dataset (7.0M entries) can lead to excessive training times.
- We'll restrict our dataset to the most recent `100,000` data points, which are likely more relevant for future predictions.
- Note that a `training period` of only two months may not capture all relevant trends, despite containing substantial data.

## Target Variable

- We define our prediction variable `signal` using the `Close` price relative to the Bull Market Support Band (the lower of 20-week SMA and 21-week EMA).
- If closing price falls below the Bull Market Support Band, it signals a bear market, otherwise a bull market.
- The trading strategy assigns signal value = 1 (buy) in bull markets and signal value = 0 (sell) in bear markets.
- The window values for both moving averages are configurable parameters, both of which are arbitrary, and can affect the results, ideally an optimisation study needs to be carried out to find optimum values.


In [None]:
import os
import pandas as pd

PROCESSED_PATH = "data/processed/"
TEST_DATA_NAME = "test_data.csv"
VALIDATION_DATA_NAME = "validation_data.csv"

TEST_DATA_FILE = os.path.join(PROCESSED_PATH, TEST_DATA_NAME)
VALIDATION_DATA__FILE = os.path.join(PROCESSED_PATH, VALIDATION_DATA_NAME)

train_data = pd.read_csv(TEST_DATA_FILE)
valid_data = pd.read_csv(VALIDATION_DATA__FILE)
train_data.set_index("Timestamp", inplace=True)
valid_data.set_index("Timestamp", inplace=True)
train_data.info()

In [None]:
import numpy as np


def generate_bull_bear_signals(price_data, verbose=True):
    # Calculate 20-week SMA for minute-level data
    # 20 weeks = 20 weeks * 7 days * 24 hours * 60 minutes = 201,600 minutes
    price_data["SMA_20W"] = (
        price_data["Close"].rolling(window=201600, min_periods=1, center=False).mean()
    )

    # Calculate 21-week EMA for minute-level data
    # 21 weeks = 21 weeks * 7 days * 24 hours * 60 minutes = 211,680 minutes
    price_data["EMA_21W"] = (
        price_data["Close"].ewm(span=211680, min_periods=1, adjust=False).mean()
    )

    # Create Bull Market Support Band (the lower of the two indicators)
    price_data["Bull_Support_Band"] = price_data[["SMA_20W", "EMA_21W"]].min(axis=1)

    # Create signals: 1 (buy) when Close is above Bull Market Support Band, 0 (sell) otherwise
    price_data["signal"] = np.where(
        price_data["Close"] > price_data["Bull_Support_Band"], 1.0, 0.0
    )

    if verbose:
        display(price_data["signal"].value_counts())

    return price_data


train_data_with_signals = train_data.copy()
valid_data_with_signals = valid_data.copy()

generate_bull_bear_signals(train_data_with_signals, verbose=True)
generate_bull_bear_signals(valid_data_with_signals, verbose=False)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns


def plot_target_correlation(df, target="demand", figsize=(12, 1), return_corr=False):
    # Calculate correlation matrix
    corr = df.corr()[target].drop(target, errors="ignore")

    if return_corr:
        return corr

    # Plot
    plt.figure(figsize=figsize)
    sns.heatmap(
        corr.to_frame().T,
        annot=True,
        cmap="coolwarm",
        center=0,
        vmin=-0.3,
        vmax=0.3,
        cbar=False,
        linewidths=1,
    )
    plt.title(f"Feature Correlation with {target}")
    plt.subplots_adjust(bottom=0.3, top=0.8)

    return plt.gcf()

In [None]:
plot_target_correlation(train_data_with_signals, "signal", figsize=(7, 0.5))