### We take raw data from 'data/' folder , populate indicators and write it to 'data_with_indicators' folder

In [None]:
# !! TODO:   make the process multi-threaded

In [1]:
import pandas as pd
import os
import pandas_ta as ta

In [3]:
# Define the folders for raw and output data
ohlcv_folder = 'data'
indicators_folder = 'data_with_indicators'
os.makedirs(indicators_folder, exist_ok=True)

In [5]:
ohlcv_files = sorted(os.listdir(ohlcv_folder))

In [7]:
len(ohlcv_files)

4971

In [9]:
# existing indicators files
indicators_files = sorted(os.listdir(indicators_folder))

In [11]:
len(indicators_files)

1989

In [13]:
# Filter out symbols that already have a file
files_to_populate = [ohlcv_file for ohlcv_file in ohlcv_files if ohlcv_file not in indicators_files]

In [15]:
len(files_to_populate)

2982

In [5]:
files_to_populate = ['NRO.csv']

In [7]:
# Iterate through each file not populated with indicators yet
for filename in files_to_populate:
    if filename.endswith('.csv'):
        symbol = filename.split('.')[0]  # Extract the stock symbol
        file_path = os.path.join(ohlcv_folder, filename)
        
        try:
            # Load OHLCV data
            data = pd.read_csv(file_path, index_col='Date', parse_dates=True)
            
            # Populate technical indicators
            # SMA:
            sma_windows = [10, 20, 50, 100, 200]
            for sma_window in sma_windows:
                data[f'SMA_{sma_window}'] = data['Close'].rolling(window=sma_window).mean()  # n-day Moving Average

            # Bollinger Bands - for 20, 50, 200 windows
            bb_windows = [20, 50, 200]
            bb_deviation = 2.0
            for bb_window in bb_windows:
                data[f'BB_{bb_window}_upper'] = data[f'SMA_{bb_window}'] + (data['Close'].rolling(window=bb_window).std() * bb_deviation)  # Upper Band
                data[f'BB_{bb_window}_lower'] = data[f'SMA_{bb_window}'] - (data['Close'].rolling(window=bb_window).std() * bb_deviation)  # Lower Band

            # RSI:
            rsi_windows = [7, 14, 30]
            for rsi_window in rsi_windows:
                data[f'RSI_{rsi_window}'] = ta.rsi(data['Close'], length=rsi_window)

            # Avg_Volume
            avg_vol_windows = [20, 40, 80]
            for vol_window in avg_vol_windows:
                data[f'Avg_Volume_{vol_window}'] = data['Volume'].rolling(window=vol_window).mean()
            
            # Optimize data types
            for col in data.columns:
                if data[col].dtype == 'float64':
                    data[col] = data[col].astype('float32')  # Convert to float32

            # Save the data with indicators to the output folder
            
            output_path = os.path.join(indicators_folder, filename)
            data.to_csv(output_path)
            
            print(f"Indicators populated for {symbol}")

        except Exception as e:
            print(f"Failed to process {symbol}: {e}")

Indicators populated for NRO
