In [2]:
# Necessary libraries

import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from pathlib import Path

In [3]:
# Adjust directories (sources and outputs)

load_dotenv()

raw_data_path = os.getenv("RAW_DATA_PATH")
processed_data_path = os.getenv("PROCESSED_DATA_PATH")
prepared_data_path = os.getenv("PREPARED_DATA_PATH")

raw_data_path = Path(raw_data_path)
processed_data_path = Path(processed_data_path)
prepared_data_path = Path(prepared_data_path)

# SPY Load

In [4]:
# Main feature - S&P 500 ETF (SPY)

spy = pd.read_csv(raw_data_path / 'SPY_raw_data.csv', header = 0)

spy = spy.iloc[2:].reset_index(drop = True)
spy = spy.rename(columns = {spy.columns[0]: 'Date'})
spy['Date'] = pd.to_datetime(spy['Date'])
spy = spy.set_index('Date')
spy = spy.apply(pd.to_numeric, errors = 'coerce')

print(spy.info())
print("--" * 30)

spy.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5256 entries, 2005-01-03 to 2025-11-20
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Close   5256 non-null   float64
 1   High    5256 non-null   float64
 2   Low     5256 non-null   float64
 3   Open    5256 non-null   float64
 4   Volume  5256 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 246.4 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2005-01-03,81.847115,82.840437,81.57497,82.704362,55748000
2005-01-04,80.847,82.010413,80.581661,81.955983,69167600
2005-01-05,80.289101,81.132744,80.282296,80.785759,65667300
2005-01-06,80.697327,81.064721,80.459202,80.581667,47814700
2005-01-07,80.58168,81.119164,80.370766,80.94227,55847700


# **Technical Indicators**

# 1. Scaled Simple Moving Average (Scaled SMA) 

In [5]:
# Scaled Simple Moving Average (Close - SMA)

def scaled_SMA(df, period):

    sma = df['Close'].rolling(period).mean()
    scaled_sma = df['Close'] - sma

    return scaled_sma

# 2. Scaled Exponential Moving Average (Scaled EMA)

In [6]:
# Scaled Exponential Moving Average (Close - EMA)

def scaled_EMA(df, period):

    ema = df['Close'].ewm(span = period, adjust = False).mean()
    scaled_ema = df['Close'] - ema

    return scaled_ema

# 3. Scaled Hull Moving Average (Scaled HMA)

In [None]:
# Scaled Hull Moving Average (Close - HMA)

# First must be calculated the WMA, but inside the HMA function

def scaled_HMA(df, period):

    # WMA

    def WMA_component(series, length):

        weights = np.arange(1, length + 1)
        result = series.rolling(window = length)
        result = result.apply(lambda x: np.dot(x, weights) / weights.sum(), raw = True)

        return result
    
    
    half = period // 2
    sqrt_period = int(np.sqrt(period))

    wma1 = WMA_component(df['Close'], half)
    wma2 = WMA_component(df['Close'], period)

    hma = WMA_component(2 * wma1 - wma2, sqrt_period)
    scaled_hma = df['Close'] - hma

    return scaled_hma

# 4. Momentum

# 5. Relative Strength Index (RSI)

# 6. Stochastic Oscillator (%K and %D)

# 7. Williams %R

# 8. Normalized Average True Range (NATR)

# 9. Scaled Bollinger Bands

# 10. Scaled Keltner Channels

# 11. On-Balance Volume (OBV)

# 12. Anchored Volume Weighted Average Price (Anchored VWAP)

# 13. Money Flow Index (MFI)

# 14. Intraday Logarithmic Volatility

# **Apliying the Indicators**