### We take raw data from 'data/' folder , populate indicators and write it to 'data_with_indicators' folder

In [1]:
import pandas as pd
import os
import pandas_ta as ta

In [21]:
# Define the folders for raw and output data
data_folder = 'data'
output_folder = 'data_with_indicators'
os.makedirs(output_folder, exist_ok=True)

In [23]:
filenames = sorted(os.listdir(data_folder))

In [31]:
# filenames = ['AAL.csv']

In [25]:
# !! TODO:   make the process multi-threaded

# Iterate through each file in the data folder
for filename in filenames:
    if filename.endswith('.csv'):
        symbol = filename.split('.')[0]  # Extract the stock symbol
        file_path = os.path.join(data_folder, filename)
        
        try:
            # Load OHLCV data
            data = pd.read_csv(file_path, index_col='Date', parse_dates=True)
            
            # Populate technical indicators
            # SMA:
            sma_windows = [10, 20, 50, 100, 200]
            for sma_window in sma_windows:
                data[f'SMA_{sma_window}'] = data['Close'].rolling(window=sma_window).mean()  # n-day Moving Average

            # Bollinger Bands - for 20, 50, 200 windows
            bb_windows = [20, 50, 200]
            bb_deviation = 2.0
            for bb_window in bb_windows:
                data[f'BB_{bb_window}_upper'] = data[f'SMA_{bb_window}'] + (data['Close'].rolling(window=bb_window).std() * bb_deviation)  # Upper Band
                data[f'BB_{bb_window}_lower'] = data[f'SMA_{bb_window}'] - (data['Close'].rolling(window=bb_window).std() * bb_deviation)  # Lower Band

            # RSI:
            rsi_windows = [7, 14, 30]
            for rsi_window in rsi_windows:
                data[f'RSI_{rsi_window}'] = ta.rsi(data['Close'], length=rsi_window)

            # Avg_Volume
            avg_vol_windows = [20, 40, 80]
            for vol_window in avg_vol_windows:
                data[f'Avg_Volume_{vol_window}'] = data['Volume'].rolling(window=vol_window).mean()
            
            # Optimize data types
            for col in data.columns:
                if data[col].dtype == 'float64':
                    data[col] = data[col].astype('float32')  # Convert to float32

            # Save the data with indicators to the output folder
            
            output_path = os.path.join(output_folder, filename)
            data.to_csv(output_path)
            
            print(f"Processed and saved data for {symbol}")

        except Exception as e:
            print(f"Failed to process {symbol}: {e}")

Processed and saved data for AAL
Processed and saved data for AAME
Processed and saved data for AAOI
Processed and saved data for AAON
Processed and saved data for AAPL
Processed and saved data for ABEO
Processed and saved data for ABUS
Processed and saved data for ABVC
Processed and saved data for ACAD
Processed and saved data for ACGL
Processed and saved data for ACHC
Processed and saved data for ACIC
Processed and saved data for ACIW
Processed and saved data for ACLS
Processed and saved data for ACNB
Processed and saved data for ACNT
Processed and saved data for ACRS
Processed and saved data for ACST
Processed and saved data for ACTG
Processed and saved data for ADBE
Processed and saved data for ADEA
Processed and saved data for ADI
Processed and saved data for ADMA
Processed and saved data for ADP
Processed and saved data for ADSK
Processed and saved data for ADTN
Processed and saved data for ADUS
Processed and saved data for ADVM
Processed and saved data for AEHR
Processed and sav