In [1]:
import pandas as pd
import os
from ta import add_all_ta_features
from ta.utils import dropna
import yfinance as yf

In [2]:
project_dir = "/home/jupyter-tfg2425paula/prediction_project_v3"
os.chdir(project_dir)

raw_data_dir = os.path.join(project_dir, "00_data")
output_data_dir = os.path.join(raw_data_dir, "raw/technical")

securities = "raw/single_name"
stocks_folder = os.path.join(raw_data_dir, securities)

stock = 'SPX'
filename = f'{stock}_Close.csv'

In [3]:
df = pd.read_csv(os.path.join(stocks_folder, filename), sep=";", decimal=",")
df["Date"] = pd.to_datetime(df["Date"], format="%d/%m/%y")
df

Unnamed: 0,Date,Close
0,2009-06-18,918.370
1,2009-06-19,921.227
2,2009-06-22,893.042
3,2009-06-23,895.098
4,2009-06-24,900.940
...,...,...
4023,2024-11-19,5916.980
4024,2024-11-20,5917.109
4025,2024-11-21,5948.711
4026,2024-11-22,5969.340


There are many possible indicators.

**Momentum indicators**

- RSI (Relative Strength Index): Measures the speed and change of price movements.
- Stochastic Oscillator: Compares the closing price to a price range over a period.
- Williams %R: Indicates overbought/oversold levels.
- Awesome Oscillator: Measures momentum using two SMAs (simple moving averages).
- KAMA (Kaufman’s Adaptive Moving Average): Adaptive moving average based on volatility.
- PPO (Percentage Price Oscillator): Measures the difference between two EMAs as a percentage of the larger EMA.
- PVO (Percentage Volume Oscillator): Similar to PPO but based on volume.
- ROC (Rate of Change): Measures the percentage change in price.

**Trend indicators**

- MACD (Moving Average Convergence Divergence): Identifies trend direction and strength.
- SMA (Simple Moving Average): Calculates the average price over a period.
- EMA (Exponential Moving Average): Weighted moving average that gives more weight to recent prices.
- WMA (Weighted Moving Average): Similar to SMA but with a weighting factor.
- DEMA (Double Exponential Moving Average): Reduces lag by applying EMA twice.
- TEMA (Triple Exponential Moving Average): Further reduces lag compared to DEMA.
- TRIX: A triple exponential moving average to identify trends.
- ADX (Average Directional Movement Index): Measures trend strength.
- Aroon Indicator: Measures the time since the highest/lowest point over a period.
- PSAR (Parabolic Stop and Reverse): Provides potential reversal points in a trend.
- Ichimoku Cloud: Identifies support, resistance, and trend strength.

**Volatility indicators**

- Bollinger Bands: Measures price volatility and potential breakouts.
- Average True Range (ATR): Measures market volatility.
- Donchian Channels: Identifies breakout levels over a period.
- Keltner Channels: Combines ATR and EMA to define price range.

**Volume indicators**

- OBV (On-Balance Volume): Combines volume and price movements to identify trends.
- CMF (Chaikin Money Flow): Measures money flow volume over a period.
- VWAP (Volume Weighted Average Price): Average price weighted by volume.
- ADI (Accumulation/Distribution Index): Tracks supply and demand using volume and price.
- Ease of Movement (EOM): Relates price movement to volume.
- MFI (Money Flow Index): Combines price and volume to identify overbought/oversold levels.

In [4]:
ticker = stock
yf_data = yf.download(ticker, start=df["Date"].min().strftime('%Y-%m-%d'), 
                      end=df["Date"].max().strftime('%Y-%m-%d'))

yf_data.columns = yf_data.columns.droplevel(1)
yf_data = yf_data.reset_index()

df_with_indicators = add_all_ta_features(
    yf_data,
    open="Open",    # Use Close for 'open'
    high="High",    # Use Close for 'high'
    low="Low",     # Use Close for 'low'
    close="Close",   # Use Close for 'close'
    volume="Volume",     # No volume data available
    fillna=False      # Fill missing values to avoid issues
)

df_with_indicators = df_with_indicators.interpolate(method="linear")

# They are only either at the beginning or the end
df_with_indicators = df_with_indicators.dropna().reset_index().drop(columns="index")
df_with_indicators

[*********************100%***********************]  1 of 1 completed


Price,Date,Adj Close,Close,High,Low,Open,Volume,volume_adi,volume_obv,volume_cmf,...,momentum_ppo,momentum_ppo_signal,momentum_ppo_hist,momentum_pvo,momentum_pvo_signal,momentum_pvo_hist,momentum_kama,others_dr,others_dlr,others_cr
0,2009-09-29,0.05675,0.05675,0.05675,0.04746,0.04746,3415184,5.992574e+06,3449825,0.595583,...,2.952495,3.380493,-0.427998,58.662854,24.546775,34.116078,0.057812,19.574375,17.876838,22.227010
1,2009-09-30,0.05675,0.05675,0.05675,0.05107,0.05675,3244243,9.236817e+06,6694068,0.668651,...,2.839929,3.272380,-0.432451,59.634066,31.564233,28.069833,0.057801,0.000000,0.000000,22.227010
2,2009-10-01,0.05417,0.05417,0.05417,0.05159,0.05159,2721937,1.195875e+07,3972131,0.712268,...,2.350666,3.088038,-0.737372,56.675465,36.586480,20.088985,0.057764,-4.546252,-4.652837,16.670261
3,2009-10-02,0.05417,0.05417,0.05417,0.05159,0.05417,754560,1.271331e+07,4726691,0.735118,...,1.940339,2.858498,-0.918159,46.382810,38.545746,7.837065,0.057727,0.000000,0.000000,16.670261
4,2009-10-05,0.05417,0.05417,0.05417,0.05417,0.05417,0,1.271331e+07,4726691,0.740886,...,1.596578,2.606114,-1.009536,33.771368,37.590870,-3.819502,0.057712,0.000000,0.000000,16.670261
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2076,2018-01-24,0.05500,0.05500,0.05500,0.05500,0.05500,7000,-3.145110e+07,190770359,0.277038,...,9.208777,9.081971,0.126806,0.741691,7.910342,-7.168651,0.050643,0.000000,0.000000,18.457895
2077,2018-01-25,0.05500,0.05500,0.05500,0.05500,0.05500,1160,-3.145110e+07,190771519,0.276780,...,8.674703,9.000517,-0.325815,-7.771069,4.774060,-12.545129,0.050950,0.000000,0.000000,18.457895
2078,2018-01-26,0.05000,0.05000,0.05000,0.05000,0.05000,1800,-3.145110e+07,190769719,0.315181,...,7.410653,8.682544,-1.271892,-15.419817,0.735284,-16.155101,0.050823,-9.090907,-9.531016,7.688998
2079,2018-01-29,0.05000,0.05000,0.06000,0.05000,0.06000,60817,-3.151192e+07,190830536,0.249026,...,6.336502,8.213336,-1.876834,-12.175545,-1.846882,-10.328664,0.050765,0.000000,0.000000,7.688998


In [5]:
df_with_indicators.to_csv(os.path.join(output_data_dir, f"{stock}_technical.csv"), index=False)