In [1]:
import numpy as np
import pandas as pd

In [None]:
'''
WILL EXPLORE THE FOLLOWING FEATURES:

Day: The day of the month, extracted from the Date column.
Daily Variation: The difference between the High and Low columns, divided by the Open column. This feature represents the volatility of the index on that day.
TimeStamp: The number of seconds elapsed since January 1st, 1970 00:00:00 UTC, calculated from the Date column. This feature represents the temporal order of the observations.
Index Hash: A numerical representation of the Index column, obtained by applying a hash function. This feature encodes the identity of each index in a compact and unique way.
Daily Return: The percentage change in the Close column from the previous day’s Close column. This feature represents the performance of the index on that day.
7-Day SMA: The 7-day simple moving average of the Close column. This feature represents the short-term trend of the index.
7-Day STD: The 7-day standard deviation of the Close column. This feature represents the short-term variability of the index.
SMA + 2 STD: The 7-day SMA plus two times the 7-day STD. This feature represents the upper bound of a confidence interval for the index.
SMA — 2 STD: The 7-day SMA minus two times the 7-day STD. This feature represents the lower bound of a confidence interval for the index.
High — Close: The difference between the High and Close columns, divided by the Open column. This feature represents the downward pressure on the index on that day.
Low — Open: The difference between the Low and Open columns, divided by the Open column. This feature represents the upward pressure on the index on that day.
Cumulative Return: The cumulative percentage change in the Close column from the first observation in the training set. This feature represents the long-term performance of the index.
14-Day EMA: The 14-day exponential moving average of the Close column. This feature represents a smoother and more responsive version of the SMA.
Close % Change: The percentage change in the Close column from the previous day’s Close column. This feature is similar to Daily Return, but without scaling by 100.
Close Change: The difference between the Close and previous day’s Close columns. This feature is similar to Daily Return, but without dividing by previous day’s Close column.
RSI: The relative strength index, calculated from a 14-day window of Close % Change. This feature is a popular technical indicator that measures the momentum and overbought/oversold conditions of an asset.
MACD: The moving average convergence divergence, calculated from a 12-day EMA and a 26-day EMA of Close % Change. This feature is another popular technical indicator that measures the trend and momentum of an asset.
Stochastic Oscillator: A technical indicator that compares the Close column with the High and Low columns over a 14-day window. This feature measures the position of the index relative to its recent range.
ATR: The average true range, calculated from a 14-day window of Daily Variation. This feature measures the volatility of the index over time.
ADX: The average directional index, calculated from a 14-day window of High, Low, and Close columns. This feature measures the strength and direction of the trend of the index.
DMI: The directional movement index, calculated from a 14-day window of High, Low, and Close columns. This feature measures the positive and negative movements of the index.
'''

In [3]:
def extract_year(date):
    """Extracts the year from a date."""
    return date.dt.year

def extract_month(date):
    """Extracts the month from a date."""
    return date.dt.month

def extract_day(date):
    """Extracts the day from a date."""
    return date.dt.day

def calculate_daily_variation(open_price, high, low):
    """Calculates the daily variation."""
    return (high - low) / open_price

def calculate_timestamp(date):
    """Calculates the timestamp."""
    return date.astype(int) // 10**9  # Convert to Unix timestamp

def calculate_index_hash(index):
    """Calculates the index hash."""
    return index.apply(hash)

def calculate_daily_return(close, lag_close):
    """Calculates the daily return."""
    return ((close - lag_close) / lag_close) * 100

def calculate_7_day_sma(close):
    """Calculates the 7-day simple moving average."""
    return close.rolling(window=7).mean()

def calculate_7_day_std(close):
    """Calculates the 7-day standard deviation."""
    return close.rolling(window=7).std(ddof=0)

def calculate_sma_plus_2_std(sma, std):
    """Calculates the SMA + 2 STD."""
    return sma + (2 * std)

def calculate_sma_minus_2_std(sma, std):
    """Calculates the SMA - 2 STD."""
    return sma - (2 * std)

def calculate_high_close(open_price, high, close):
    """Calculates the high-close."""
    return (high - close) / open_price

def calculate_low_open(low, open_price):
    """Calculates the low-open."""
    return (low - open_price) / open_price

def calculate_cumulative_return(close):
    """Calculates the cumulative return."""
    return ((close - close.iloc[0]) / close.iloc[0]) * 100

def calculate_14_day_ema(close):
    """Calculates the 14-day exponential moving average."""
    return close.ewm(span=14, adjust=False).mean()

def calculate_close_percent_change(close, lag_close):
    """Calculates the close percent change."""
    return (close - lag_close) / lag_close

def calculate_close_change(close, lag_close):
    """Calculates the close change."""
    return close - lag_close

def calculate_rsi(close_percent_change, window=14):
    """
    Calculates the relative strength index (RSI).

    RSI is a momentum oscillator that measures the speed and change of price movements. It oscillates between 0 and 100.
    RSI above 70 indicates overbought conditions, while RSI below 30 indicates oversold conditions.

    Parameters:
    - close_percent_change: Pandas Series containing the percentage change in closing prices.
    - window: Window size for calculating RSI (default: 14).

    Returns:
    Pandas Series containing the RSI for each day.
    """
    gain = close_percent_change.where(close_percent_change > 0, 0)
    loss = np.abs(close_percent_change.where(close_percent_change < 0, 0))
    avg_gain = gain.rolling(window=window, min_periods=1).mean()
    avg_loss = loss.rolling(window=window, min_periods=1).mean()
    rs = avg_gain / avg_loss
    return 100 - (100 / (1 + rs))

def calculate_macd(close_percent_change, short_window=12, long_window=26):
    """
    Calculates the moving average convergence divergence (MACD).

    MACD is a trend-following momentum indicator that shows the relationship between two moving averages of a security's price.
    It consists of the MACD line, signal line, and histogram.

    Parameters:
    - close_percent_change: Pandas Series containing the percentage change in closing prices.
    - short_window: Window size for short-term EMA (default: 12).
    - long_window: Window size for long-term EMA (default: 26).

    Returns:
    Two Pandas Series containing the MACD line and signal line for each day.
    """
    short_ema = close_percent_change.ewm(span=short_window, adjust=False).mean()
    long_ema = close_percent_change.ewm(span=long_window, adjust=False).mean()
    macd_line = short_ema - long_ema
    signal_line = macd_line.ewm(span=9, adjust=False).mean()
    return macd_line, signal_line

def calculate_stochastic_oscillator(close, high, low, window=14):
    """
    Calculates the stochastic oscillator.

    The stochastic oscillator compares a security's closing price to its price range over a certain period.
    It oscillates between 0 and 100 and is used to identify overbought and oversold conditions.

    Parameters:
    - close: Pandas Series containing closing prices.
    - high: Pandas Series containing highest prices.
    - low: Pandas Series containing lowest prices.
    - window: Window size for calculating the stochastic oscillator (default: 14).

    Returns:
    Pandas Series containing the stochastic oscillator value for each day.
    """
    lowest_low = low.rolling(window=window).min()
    highest_high = high.rolling(window=window).max()
    stochastic_oscillator = ((close - lowest_low) / (highest_high - lowest_low)) * 100
    return stochastic_oscillator

def calculate_average_true_range(daily_variation, window=14):
    """
    Calculates the average true range (ATR).

    ATR measures market volatility by calculating the moving average of a security's true range.
    It does not predict direction but provides insight into the intensity of price fluctuations.

    Parameters:
    - daily_variation: Pandas Series containing the daily variation.
    - window: Window size for calculating ATR (default: 14).

    Returns:
    Pandas Series containing the ATR for each day.
    """
    return daily_variation.rolling(window=window).mean()

def calculate_adx(high, low, close, window=14):
    """
    Calculates the average directional index (ADX).

    ADX is a trend strength indicator that measures the strength of a trend, regardless of its direction.
    It is calculated from the smoothed averages of positive and negative price movements.

    Parameters:
    - high: Pandas Series containing highest prices.
    - low: Pandas Series containing lowest prices.
    - close: Pandas Series containing closing prices.
    - window: Window size for calculating ADX (default: 14).

    Returns:
    Pandas Series containing the ADX for each day.
    """
    tr = pd.DataFrame(index=high.index)
    tr['h_l'] = high - low
    tr['h_pc'] = np.abs(high - close.shift(1))
    tr['l_pc'] = np.abs(low - close.shift(1))
    tr['true_range'] = tr.max(axis=1)
    pdm = np.where((high - high.shift(1)) > (low.shift(1) - low), high - high.shift(1), 0)
    ndm = np.where((low.shift(1) - low) > (high - high.shift(1)), low.shift(1) - low, 0)
    smooth_pdm = pdm.rolling(window=window).mean()
    smooth_ndm = ndm.rolling(window=window).mean()
    atr = tr['true_range'].rolling(window=window).mean()
    pdi = (smooth_pdm / atr) * 100
    ndi = (smooth_ndm / atr) * 100
    dx = (np.abs(pdi - ndi) / (pdi + ndi)) * 100
    adx = dx.rolling(window=window).mean()
    return adx