In [33]:
import pandas as pd
import numpy as np
from datetime import datetime
from pandas.api.types import CategoricalDtype

In [None]:
def add_features(tickers, interval):

    if isinstance(tickers, str): #adds single ticker to list
        tickers = [tickers]

    for ticker in tickers:
        
        filename_in = f"..\Resources\Raw_Files\YFINANCE\{ticker}_{interval}_data.csv"
        
        df = pd.read_csv(filename_in)
        
              
        df.insert(1, 'Ticker', ticker)
        
        df['Price_Movement'] = df['Close'] - df['Open']
        df['Price_Movement_Pct'] = ((df['Close'] - df['Open'])/df['Open']) * 100

        df['Date'] = pd.to_datetime(df['Date'])

        df['Year'] = df['Date'].dt.year
        df['Month'] = df['Date'].dt.month
        df['Day'] = df['Date'].dt.day
        df['Day_of_Year'] = df['Date'].dt.dayofyear
        df['Day_of_Week'] = df['Date'].dt.dayofweek
        df['quarter'] = df['Date'].dt.quarter

        lags = [1, 3, 5, 7, 14, 21, 28, 365]
        for lag in lags:
            df[f'Close_lag{lag}'] = df['Close'].shift(lag)

        df['Month_Sin'] = np.sin(2 * np.pi * df['Month'] / 12).round(6)
        df['Month_Cos'] = np.cos(2 * np.pi * df['Month'] / 12).round(6)

        df['Dayofweek_Sin'] = np.sin(2 * np.pi * df['Day_of_Week'] / 7)
        df['Dayofweek_Cos'] = np.cos(2 * np.pi * df['Day_of_Week'] / 7)

        df.fillna(0, inplace=True)
        
        df = df.assign(
            Price_Movement_Category=lambda x: x["Price_Movement"].apply(
                lambda m: "high" if m > 0 else ("low" if m < 0 else "same")
                )
        )

        movement_type = CategoricalDtype(categories=['high',
                                                   'same', 
                                                   'low'],
                                       ordered=True)
        df["Price_Movement_Category"] = df["Price_Movement_Category"].astype(movement_type)
        df["Price_Movement_Code"] = df["Price_Movement_Category"].cat.codes

        print(f"Ticker: {ticker} | Shape: {df.shape}\n")
        print(df.head(2))
        print(df.tail(2),"\n")

        filename_out = f"..\Resources\Featured_Files\YFINANCE\{ticker}_{interval}_features.csv"

        df.to_csv(filename_out, index = False)


In [35]:
ticker_list1 = ["QQQ","QQQE","SKY","VOO","IVV","VTI","ITOT"]
ticker_list2 = [
    'SKY', 'VOO', 'IVV', 'VTI', 'ITOT',    # Core S&P/Total Market
    'QQQ', 'QQQE',                         # Nasdaq/Growth
    'IWM', 'IWF', 'IWD', 'MDY',            # Small/Mid/Growth/Value
    'XLK', 'XLF', 'XLE', 'XLV',             # Tech/Finance/Energy
    'TLT', 'BND', 'HYG',                   # Bonds
    'VEU', 'EFA'                           # International (US-listed)
]
interval = "1d"

In [36]:
add_features(ticker_list2, interval)

Ticker: SKY | Shape: (1507, 31)

        Date Ticker       Open       High    Low      Close  Volume  \
0 2020-01-02    SKY  31.870001  32.119999  31.43  32.110001  452300   
1 2020-01-03    SKY  31.639999  32.110001  30.85  32.060001  440100   

   Dividends  Stock Splits  Price_Movement  ...  Close_lag14  Close_lag21  \
0        0.0           0.0        0.240000  ...          0.0          0.0   
1        0.0           0.0        0.420002  ...          0.0          0.0   

   Close_lag28  Close_lag365  Month_Sin  Month_Cos  Dayofweek_Sin  \
0          0.0           0.0        0.5   0.866025       0.433884   
1          0.0           0.0        0.5   0.866025      -0.433884   

   Dayofweek_Cos  Price_Movement_Category  Price_Movement_Code  
0      -0.900969                     high                    0  
1      -0.900969                     high                    0  

[2 rows x 31 columns]
           Date Ticker       Open       High        Low      Close  Volume  \
1505 2025-12-29  