In [1]:
# X-Variables #

import numpy as np
import pandas as pd
import yfinance as yf
import warnings

warnings.filterwarnings("ignore")

## Reading Symbols from Dataset ##
symbols = pd.read_csv("stock_symbols_new.csv")
tickers = symbols['symbol'].tolist()
tickers.remove("SPY")
tickers = tickers[:50]  # limit to 50 for testing
benchmark = 'SPY'
all_tickers = tickers + [benchmark]

## Download Price Data ##
prices = yf.download(
    all_tickers,
    start='2010-01-01',
    auto_adjust=False,
    progress=False
)['Adj Close']

prices = prices.dropna(how='all')

print(prices.head(10))

## Compute Features ##
monthly_px = prices.resample('ME').last()  # month-end prices
mom_1m  = monthly_px / monthly_px.shift(1)  - 1  # 1-month momentum
mom_3m  = monthly_px / monthly_px.shift(3)  - 1  # 3-month momentum
mom_6m  = monthly_px / monthly_px.shift(6)  - 1  # 6-month momentum
mom_12m = monthly_px / monthly_px.shift(12) - 1  # 12-month momentum
mom_12m_ex_1m = (monthly_px.shift(1) / monthly_px.shift(12)) - 1  # 12-month momentum excluding most recent month

rel_3m_spy  = mom_3m.sub(mom_3m["SPY"], axis=0)  # relative strength against S&P 3-month
rel_6m_spy  = mom_6m.sub(mom_6m["SPY"], axis=0)  # relative strength against S&P 6-month
rel_12m_spy = mom_12m.sub(mom_12m["SPY"], axis=0) # relative strength against S&P 12-month

daily_ret = prices.pct_change() # daily returns

vol_3m = (daily_ret.rolling(63).std() * np.sqrt(252)).resample("M").last() # 3-month volatility
vol_6m = (daily_ret.rolling(126).std() * np.sqrt(252)).resample("M").last() # 6-month volatility


roll_max_6m  = monthly_px.rolling(6).max()  # 6-month rolling max
roll_max_12m = monthly_px.rolling(12).max() # 12-month rolling max

drawdown_6m  = monthly_px / roll_max_6m  - 1  # 6-month drawdown
drawdown_12m = monthly_px / roll_max_12m - 1  # 12-month drawdown

dma_200 = prices.rolling(200).mean().resample("M").last() # 200-day moving average
pct_above_200dma = monthly_px / dma_200 - 1  # pct above 200-day moving average


## Combine Features ##
X = pd.concat(
    {
        "mom_1m": mom_1m[tickers],
        "mom_3m": mom_3m[tickers],
        "mom_6m": mom_6m[tickers],
        "mom_12m": mom_12m[tickers],
        "mom_12m_ex_1m": mom_12m_ex_1m[tickers],
        "rel_3m_spy": rel_3m_spy[tickers],
        "rel_6m_spy": rel_6m_spy[tickers],
        "rel_12m_spy": rel_12m_spy[tickers],
        "vol_3m": vol_3m[tickers],
        "vol_6m": vol_6m[tickers],
        #"downside_vol_6m": downside_vol_6m[tickers],
        "drawdown_6m": drawdown_6m[tickers],
        "drawdown_12m": drawdown_12m[tickers],
        "pct_above_200dma": pct_above_200dma[tickers],
    },
    axis=1
)


## Standardize Data Function - z score ##
def zscore_cs(row: pd.Series) -> pd.Series:
    # row contains values across tickers for a single feature at a single date
    mu = row.mean()
    sd = row.std(ddof=0)
    if sd == 0 or np.isnan(sd):
        return row * 0.0
    return (row - mu) / sd # calcs z-score


## Normalize per feature across tickers at each date ##
X_z = X.copy()
print("X_z before normalization:")
print(X_z.head(20))

for feat in X.columns.get_level_values(0).unique():
    X_z[feat] = X[feat].apply(zscore_cs, axis=1)

print("X_z after normalization:")
print(X_z.head(20))

## Flatten X_z table so each  ticker is a row ##
X_panel = (
    X_z.stack(level=1)              # index becomes (Date, Ticker)
      .rename_axis(index=["Date","Ticker"])
      .reset_index()
)


print(X_panel.head())
print("-----  -----  -----")
print(X_panel.tail())
print("-----  -----  -----")

print("Size of dataset:",
"Rows:",X_panel.shape[0],
"Columns:",X_panel.shape[1])  

X_panel.to_csv("dependent_variables_technicals.csv", index=False)





Ticker          AAPL  ABBV   AMD    AMZN       ASML      AVGO        AXP  \
Date                                                                       
2010-01-04  6.418385   NaN  9.70  6.6950  32.425716  1.328563  32.483315   
2010-01-05  6.429480   NaN  9.71  6.7345  32.678314  1.338425  32.411865   
2010-01-06  6.327211   NaN  9.57  6.6125  32.977692  1.348991  32.935806   
2010-01-07  6.315515   NaN  9.47  6.5000  32.060856  1.340538  33.469967   
2010-01-08  6.357501   NaN  9.43  6.6760  31.293734  1.350400  33.446056   
2010-01-11  6.301419   NaN  9.14  6.5155  30.629501  1.358854  33.063351   
2010-01-12  6.229739   NaN  8.65  6.3675  30.676275  1.338425  33.501873   
2010-01-13  6.317613   NaN  9.15  6.4555  31.527601  1.297567  33.605515   
2010-01-14  6.281024   NaN  9.00  6.3675  31.312437  1.298272  34.028072   
2010-01-15  6.176055   NaN  8.84  6.3570  30.620136  1.284888  33.796871   

Ticker            AZN  BABA        BAC  ...        RTX        SAP        SPY  \
Date   