In [180]:
# Necessary libraries

import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from pathlib import Path

In [181]:
# Adjust directories (sources and outputs)

load_dotenv()

raw_data_path = os.getenv("RAW_DATA_PATH")
processed_data_path = os.getenv("PROCESSED_DATA_PATH")

raw_data_path = Path(raw_data_path)
processed_data_path = Path(processed_data_path)

# SPY Load

In [182]:
# Main feature - S&P 500 ETF (SPY)

spy = pd.read_csv(raw_data_path / 'SPY_raw_data.csv', header = 0)

spy = spy.iloc[2:].reset_index(drop = True)
spy = spy.rename(columns = {spy.columns[0]: 'Date'})
spy['Date'] = pd.to_datetime(spy['Date'])
spy = spy.set_index('Date')
spy = spy.apply(pd.to_numeric, errors = 'coerce')

print(spy.info())
print("--" * 30)
print(spy.isnull().sum())
print("--" * 30)

spy.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5256 entries, 2005-01-03 to 2025-11-20
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Close   5256 non-null   float64
 1   High    5256 non-null   float64
 2   Low     5256 non-null   float64
 3   Open    5256 non-null   float64
 4   Volume  5256 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 246.4 KB
None
------------------------------------------------------------
Close     0
High      0
Low       0
Open      0
Volume    0
dtype: int64
------------------------------------------------------------


Unnamed: 0_level_0,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2005-01-03,81.847115,82.840437,81.57497,82.704362,55748000
2005-01-04,80.847,82.010413,80.581661,81.955983,69167600
2005-01-05,80.289101,81.132744,80.282296,80.785759,65667300
2005-01-06,80.697327,81.064721,80.459202,80.581667,47814700
2005-01-07,80.58168,81.119164,80.370766,80.94227,55847700


# **Technical Indicators**

# 1. Scaled Simple Moving Average (Scaled SMA) 

In [183]:
# Scaled Simple Moving Average (Close - SMA)

def scaled_SMA(df, period):

    sma = df['Close'].rolling(period).mean()
    scaled_sma = df['Close'] - sma

    return scaled_sma

# 2. Scaled Exponential Moving Average (Scaled EMA)

In [184]:
# Scaled Exponential Moving Average (Close - EMA)

def scaled_EMA(df, period):

    ema = df['Close'].ewm(span = period, adjust = False).mean()
    scaled_ema = df['Close'] - ema

    return scaled_ema

# 3. Scaled Hull Moving Average (Scaled HMA)

In [185]:
# Scaled Hull Moving Average (Close - HMA)

# First must be calculated the WMA, but inside the HMA function

def scaled_HMA(df, period):

    # WMA

    def WMA_component(series, length):

        weights = np.arange(1, length + 1)
        result = series.rolling(window = length)
        result = result.apply(lambda x: np.dot(x, weights) / weights.sum(), raw = True)

        return result
    
    
    half = period // 2
    sqrt_period = int(np.sqrt(period))

    wma1 = WMA_component(df['Close'], half)
    wma2 = WMA_component(df['Close'], period)

    hma = WMA_component(2 * wma1 - wma2, sqrt_period)
    scaled_hma = df['Close'] - hma

    return scaled_hma

# 4. Momentum

In [186]:
# Momentum Indicator

def momentum(df, period):

    momtm = df['Close'] - df['Close'].shift(period)

    return momtm

# 5. Relative Strength Index (RSI)

In [187]:
# RSI (Relative Strength Index)

def rsi(df, period):

    delta = df['Close'].diff()
    
    gain = delta.clip(lower = 0)
    loss = - delta.clip(upper = 0)

    avg_gain = gain.rolling(period).mean()
    avg_loss = loss.rolling(period).mean()

    rs = avg_gain / avg_loss

    rsi_value = 100 - (100 / (1 + rs))

    return rsi_value

# 6. Stochastic Oscillator (%K and %D)

In [188]:
# Stochastic Oscillator (%K and %D)

def stochastic(df, period, smooth_k = 1, smooth_d = 3):

    low_min = df['Low'].rolling(period).min()
    high_max = df['High'].rolling(period).max()

    k = 100 * (df['Close'] - low_min) / (high_max - low_min)

    k_smooth = k.rolling(smooth_k).mean()
    d_smooth = k_smooth.rolling(smooth_d).mean()

    return k_smooth, d_smooth

# 7. Williams %R

In [189]:
# Williams %R

def williams_r(df, period):

    low_min = df['Low'].rolling(period).min()
    high_max = df['High'].rolling(period).max()

    wr = - 100 * (high_max - df['Close']) / (high_max - low_min)

    return wr

# 8. Normalized Average True Range (NATR)

In [190]:
# Normalized ATR (ATR / Close)

def normalized_atr(df, period):

    high_low = df['High'] - df['Low']
    high_close = np.abs(df['High'] - df['Close'].shift())
    low_close = np.abs(df['Low'] - df['Close'].shift())

    tr = pd.concat([high_low, high_close, low_close], axis = 1).max(axis = 1)
    atr = tr.rolling(period).mean()

    norm_atr = atr / df['Close']

    return norm_atr

# 9. Scaled Bollinger Bands

In [191]:
# Scaled Bollinger Bands (with 2 standard deviations)

def scaled_bb(df, period, num_std = 2):

    sma = df['Close'].rolling(period).mean()
    std = df['Close'].rolling(period).std()

    upper = sma + (num_std * std)
    lower = sma - (num_std * std)

    scaled_upper = df['Close'] - upper
    scaled_lower = df['Close'] - lower
    
    return scaled_upper, scaled_lower

# 10. Scaled Keltner Channels

In [192]:
# Scaled Keltner Channels

def scaled_keltner(df, period, atr_mult = 2):

    ema = df['Close'].ewm(span = period, adjust = False).mean()

    # ATR

    high_low = df['High'] - df['Low']
    high_close = np.abs(df['High'] - df['Close'].shift())
    low_close = np.abs(df['Low'] - df['Close'].shift())

    tr = pd.concat([high_low, high_close, low_close], axis = 1).max(axis = 1)
    atr = tr.rolling(period).mean()

    upper = ema + (atr_mult * atr)
    lower = ema - (atr_mult * atr)

    scaled_upper = df['Close'] - upper
    scaled_lower = df['Close'] - lower

    return scaled_upper, scaled_lower

# 11. On-Balance Volume (OBV)

In [193]:
# On-Balance Volume

def obv(df):

    direction = np.sign(df['Close'].diff()).fillna(0)

    dir_vol = (direction * df['Volume']).cumsum()

    return dir_vol

# 12. Anchored Volume Weighted Average Price (Anchored VWAP)

In [194]:
# Anchored VWAP 

def anchored_vwap(df, anchor_index = 0):

    typical_price = (df['High'] + df['Low'] + df['Close']) / 3
    cum_tp_vol = (typical_price * df['Volume']).cumsum() - (typical_price * df['Volume']).cumsum().iloc[anchor_index]
    cum_vol = df['Volume'].cumsum() - df['Volume'].cumsum().iloc[anchor_index]

    vwap = cum_tp_vol / cum_vol
    
    return vwap

# 13. Intraday Logarithmic Volatility

In [195]:
# Intraday Logarithmic Volatility

def ilv(df):

    dlog = np.log(df['High'] / df['Low'])

    return dlog

# **Applying the Indicators**

In [196]:
# Add the indicators to SPY dataframe

def add_indicators(spy, 
                   period_sma = 50,
                   period_sma2 = 200,
                   period_ema = 50,
                   period_ema2 = 200,
                   period_hma = 50,
                   period_hma2 = 200,
                   period_momentum = 20,
                   period_momentum2 = 100,
                   period_rsi = 14,
                   period_stochastic = 14,
                   period_williamsR = 21,
                   period_atr = 14,
                   period_bb = 21,
                   period_keltner = 21,
                   ):
    

    # SSMA50 and SSMA200

    spy['Scaled_SMA50'] = scaled_SMA(spy, period = period_sma)
    spy['Scaled_SMA200'] = scaled_SMA(spy, period = period_sma2)

    # SEMA50 and SEMA200

    spy['Scaled_EMA50'] = scaled_EMA(spy, period = period_ema)
    spy['Scaled_EMA200'] = scaled_EMA(spy, period = period_ema2)

    # SHMA50 and SHMA200

    spy['Scaled_HMA50'] = scaled_HMA(spy, period = period_hma)
    spy['Scaled_HMA200'] = scaled_HMA(spy, period = period_hma2)

    # Momentum

    spy['Momentum_20p'] = momentum(spy, period = period_momentum)
    spy['Momentum_100p'] = momentum(spy, period = period_momentum2)

    # RSI

    spy['RSI'] = rsi(spy, period = period_rsi)

    # Stochastic (%K and %D)

    k, d = stochastic(spy, period = period_stochastic)
    spy['Stoch_K'] = k
    spy['Stoch_D'] = d

    # Williams %R

    spy['WilliamsR'] = williams_r(spy, period = period_williamsR)

    # NATR

    spy['Norm_ATR'] = normalized_atr(spy, period = period_atr)

    # Scaled Bollinger BAnds

    s_upper, s_lower = scaled_bb(spy, period_bb)
    spy['Scaled_Upper_Bollinger'] = s_upper
    spy['Scaled_Lower_Bollinger'] = s_lower

    # Scaled Keltner Channels

    s_upper, s_lower = scaled_keltner(spy, period_keltner)
    spy['Scaled_Upper_Keltner'] = s_upper
    spy['Scaled_Lower_Keltner'] = s_lower

    # OBV

    spy['OBV'] = obv(spy)

    # Anchored VWAP

    spy['Anchored_VWAP'] = anchored_vwap(spy)

    # Intraddy Logarithmic Volatility

    spy['ILV'] = ilv(spy)


    return spy

In [197]:
# Add indicators

spy = add_indicators(spy)

Drop the Close Price column because, the target (Close) has been already separately processed

In [198]:
# Drop Close

spy = spy.drop(columns = 'Close', axis = 1)
spy.tail(10)

Unnamed: 0_level_0,High,Low,Open,Volume,Scaled_SMA50,Scaled_SMA200,Scaled_EMA50,Scaled_EMA200,Scaled_HMA50,Scaled_HMA200,...,Stoch_D,WilliamsR,Norm_ATR,Scaled_Upper_Bollinger,Scaled_Lower_Bollinger,Scaled_Upper_Keltner,Scaled_Lower_Keltner,OBV,Anchored_VWAP,ILV
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-11-07,671.080017,661.210022,667.909973,100592400,6.32405,62.23748,7.106269,51.298674,-9.336721,-12.93727,...,40.802493,-50.814023,0.010217,-20.612585,17.132529,-19.922404,14.399513,15887730000.0,174.943539,0.014817
2025-11-10,682.179993,675.030029,677.23999,75842900,16.030614,72.313757,16.887034,61.154092,1.00283,-3.181423,...,43.939279,-22.611561,0.010919,-9.54032,24.293663,-9.210299,23.225911,15963580000.0,175.002646,0.010536
2025-11-11,683.570007,678.72998,679.950012,58953400,16.700608,73.429573,17.723618,62.090069,2.288698,-2.325456,...,60.582573,-18.341108,0.01049,-8.568573,24.540957,-7.736815,23.31464,16022530000.0,175.048782,0.007106
2025-11-12,684.960022,680.950012,684.789978,62312500,16.252217,73.389023,17.393677,61.848481,2.2478,-2.636197,...,75.102332,-19.392458,0.010391,-8.487506,23.205611,-7.275628,22.128204,16084840000.0,175.097667,0.005872
2025-11-13,680.859985,670.52002,680.5,103457800,4.417897,61.671709,5.816252,50.005883,-9.334499,-14.605515,...,64.104289,-54.188462,0.01122,-19.483256,10.867022,-18.408431,11.292543,15981390000.0,175.177431,0.015303
2025-11-14,675.659973,663.27002,665.380005,96846700,3.778322,61.200996,5.482492,49.399421,-9.526877,-15.28522,...,51.152363,-56.305516,0.011657,-18.50659,8.595149,-18.260199,11.591236,15884540000.0,175.251449,0.018508
2025-11-17,673.710022,662.169983,669.700012,90456100,-2.854365,54.595599,-0.747027,42.710163,-15.53302,-22.035176,...,30.431609,-84.345516,0.012567,-24.601113,2.047751,-24.021319,6.57679,15794080000.0,175.32013,0.017277
2025-11-18,665.119995,655.859985,662.099976,114467500,-8.675292,48.668121,-6.088483,36.750842,-20.397421,-28.020518,...,22.436995,-87.529467,0.012996,-31.394279,-1.270446,-28.499696,2.477459,15679610000.0,175.405808,0.01402
2025-11-19,667.340027,658.75,660.780029,94703000,-6.369768,50.887723,-3.399731,38.909777,-16.780084,-25.790705,...,17.43193,-79.994048,0.013057,-29.460056,2.720066,-25.530127,6.509889,15774320000.0,175.477043,0.012956
2025-11-20,675.559998,651.890015,672.909973,165293500,-16.404108,40.519955,-12.970306,28.523136,-25.270964,-36.102013,...,12.8429,-98.307287,0.015211,-41.54148,-3.94418,-35.273078,-0.381639,15609020000.0,175.600583,0.035666


In [199]:
spy.isnull().sum()

High                        0
Low                         0
Open                        0
Volume                      0
Scaled_SMA50               49
Scaled_SMA200             199
Scaled_EMA50                0
Scaled_EMA200               0
Scaled_HMA50               55
Scaled_HMA200             212
Momentum_20p               20
Momentum_100p             100
RSI                        14
Stoch_K                    13
Stoch_D                    15
WilliamsR                  20
Norm_ATR                   13
Scaled_Upper_Bollinger     20
Scaled_Lower_Bollinger     20
Scaled_Upper_Keltner       20
Scaled_Lower_Keltner       20
OBV                         0
Anchored_VWAP               1
ILV                         0
dtype: int64