In [None]:
from tqdm import tqdm
import pandas as pd
import pandas_ta as ta

df = pd.read_csv('/Users/cyruskurd/Documents/grad_programming/AML/Project work/combined_data_with_y.csv')

# Initialize a list to hold processed DataFrames
tickers = df['ticker'].unique()
all_data = []

# Iterate through each ticker
for ticker in tqdm(tickers, desc="Processing Tickers"):
    # Extract data for the current ticker and explicitly create a copy
    ticker_df = df.loc[df['ticker'] == ticker].copy()

    # Convert 'timestamp' to datetime and set it as the index
    ticker_df['timestamp'] = pd.to_datetime(ticker_df['timestamp'])
    ticker_df.set_index('timestamp', inplace=True)

    # Sort by the index to ensure data is in chronological order
    ticker_df = ticker_df.sort_index()

    # Ensure no missing values in essential columns
    ticker_df = ticker_df.dropna(subset=['open', 'high', 'low', 'close', 'vol'])

    # Skip tickers with insufficient data for indicators
    if len(ticker_df) < 26:  # At least `slow` (26) periods are required for MACD
        print(f"Skipping {ticker}: Insufficient data for indicator calculations.")
        continue

    # Compute Technical Indicators using pandas_ta
    ta_df = ticker_df.copy()

    # 1. Simple Moving Averages
    ta_df['SMA_10'] = ta.sma(ta_df['close'], length=10, append=True)
    ta_df['SMA_20'] = ta.sma(ta_df['close'], length=20, append=True)

    # 2. Exponential Moving Averages
    ta_df['EMA_10'] = ta.ema(ta_df['close'], length=10, append=True)
    ta_df['EMA_20'] = ta.ema(ta_df['close'], length=20, append=True)

    # 3. MACD
    try:
        # MACD requires at least `slow` periods of data
        ta_df.ta.macd(close='close', fast=12, slow=26, signal=9, append=True)
    except Exception as e:
        print(f"Skipping MACD for {ticker}: {e}")
        continue

    # 4. Relative Strength Index (RSI)
    ta_df['RSI_14'] = ta.rsi(ta_df['close'], length=14, append=True)

    # 5. Bollinger Bands
    ta_df.ta.bbands(close='close', length=20, std=2, append=True)

    # 6. Average True Range (ATR)
    ta_df['ATR_14'] = ta.atr(high=ta_df['high'], low=ta_df['low'], close=ta_df['close'], length=14, append=True)

    # 7. On-Balance Volume (OBV)
    ta_df['OBV'] = ta.obv(close=ta_df['close'], volume=ta_df['vol'], append=True)

    # Add the 'ticker' column back for identification
    ta_df['ticker'] = ticker

    # Append the processed data to the list
    all_data.append(ta_df)

# Combine all processed data into a single DataFrame
combined_data = pd.concat(all_data)

# Reset the index if needed (optional)
combined_data.reset_index(inplace=True)

# Save the combined data to a CSV file
combined_data.to_csv("combined_data_with_y_ta.csv", index=False)

print("Processing complete. Data saved to 'combined_processed_data.csv'.")
