# Import Libraries

In [222]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.utils import resample, shuffle
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB, BernoulliNB
from sklearn.metrics import accuracy_score, classification_report, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Download Stocks/Indices 

In [225]:
tickers = ["MSFT", "AMZN", "^GSPC", "^DJI"]  # MSFT, AMZN, S&P 500, Dow Jones
data = yf.download(tickers, start="2015-01-01", end="2025-01-01", auto_adjust=False)

ohlcv = {t: data.xs(t, axis=1, level=1) for t in tickers} #open, high, low, close, volume

[*********************100%***********************]  4 of 4 completed


# Create Features 

In [228]:
def compute_indicators(df):
    indicators = pd.DataFrame(index=df.index)

    close = df["Adj Close"]
    high = df["High"]
    low = df["Low"]
    vol = df["Volume"]

    # Simple Moving Average 10 day
    indicators["SMA10"] = close.rolling(window=10).mean()

    # Weighted Moving Average 10 day
    weights = np.arange(1, 11)
    indicators["WMA10"] = close.rolling(10).apply(lambda x: np.dot(x, weights)/weights.sum(), raw=True)

    # Momentum 10 day
    indicators["MOM10"] = close - close.shift(10)

    # Stochastic Oscillator %K 14 day
    indicators["STOCHK"] = 100 * (close - low.rolling(14).min()) / (high.rolling(14).min() - low.rolling(14).min())

    # Stochastic Oscillator %D 3 day SMA of %K
    indicators["STOCHD"] = indicators["STOCHK"].rolling(3).mean()

    # Relative Strength Index 14 day 
    delta = close.diff()
    up = delta.clip(lower=0)
    down = -1 * delta.clip(upper=0)
    rs = up.rolling(14).mean() / down.rolling(14).mean()
    indicators["RSI14"] = 100 - (100 / (1 + rs))

    # MACD 12-day EMA - 26 day EMA
    ema12 = close.ewm(span=12, adjust=False).mean()
    ema26 = close.ewm(span=26, adjust=False).mean()
    indicators["MACD"] = ema12 - ema26

    # Williams %R 14 day
    highest14 = high.rolling(14).max()
    lowest14 = low.rolling(14).min()
    indicators["WILLR"] = -100 * (highest14 - close) / (highest14 - lowest14)

    # 9. Accumulation/Distribution Oscillator 
    clv = ((close - low) - (high - close)) / (high - low)
    adl = (clv * vol).cumsum()
    indicators["ADOSC"] = adl

    # 10. Commodity Channel Index 20 day
    tp = (high + low + close) / 3
    sma = tp.rolling(20).mean()
    mad = (tp - sma).abs().rolling(20).mean()
    indicators["CCI20"] = (tp - sma) / (0.015 * mad)

    return indicators


In [230]:
results = {}

for ticker in tickers:
    print(f" \nTraining models for {ticker} ")
    
    X_ticker = features[ticker].copy()
    y_ticker = (ohlcv[ticker]["Adj Close"].shift(-1) > ohlcv[ticker]["Adj Close"]).astype(int)
    
    X_ticker = X_ticker.dropna()
    y_ticker = y_ticker.loc[X_ticker.index]

    # balance dataset 
    df_bal = X_ticker.copy()
    df_bal["target"] = y_ticker

    ups = df_bal[df_bal["target"] == 1]
    downs = df_bal[df_bal["target"] == 0]

    minority = min(len(ups), len(downs))
    ups_res = resample(ups, n_samples=minority, random_state=42)
    downs_res = resample(downs, n_samples=minority, random_state=42)

    balanced = pd.concat([ups_res, downs_res])

    balanced = shuffle(balanced, random_state=42)
    
    X_bal = balanced.drop("target", axis=1)
    y_bal = balanced["target"]
    
    X_train, X_test, y_train, y_test = train_test_split(X_bal, y_bal, test_size=0.2, shuffle=False)
    
    # GaussianNB Continuous representation 
    gnb = GaussianNB()
    gnb.fit(X_train, y_train)
    y_pred_g = gnb.predict(X_test)
    
    print("GaussianNB Results:")
    print(classification_report(y_test, y_pred_g, zero_division=0))
    
    # BernoulliNB Discrete representation 
    X_bin = (X_ticker > X_ticker.shift(1)).astype(int).dropna()
    y_bin = y_ticker.loc[X_bin.index]
    
    X_train_b, X_test_b, y_train_b, y_test_b = train_test_split(X_bin, y_bin, test_size=0.2, shuffle=False)
    
    bnb = BernoulliNB()
    bnb.fit(X_train_b, y_train_b)
    y_pred_b = bnb.predict(X_test_b)
    
    print("BernoulliNB Results:")
    print(classification_report(y_test_b, y_pred_b, zero_division=0))

    results[ticker] = {"GaussianNB": (y_test, y_pred_g), "BernoulliNB": (y_test_b, y_pred_b)}


 
Training models for MSFT 
GaussianNB Results:
              precision    recall  f1-score   support

           0       0.56      0.24      0.33       229
           1       0.52      0.81      0.63       232

    accuracy                           0.53       461
   macro avg       0.54      0.53      0.48       461
weighted avg       0.54      0.53      0.48       461

BernoulliNB Results:
              precision    recall  f1-score   support

           0       0.43      0.41      0.42       227
           1       0.52      0.55      0.54       269

    accuracy                           0.48       496
   macro avg       0.48      0.48      0.48       496
weighted avg       0.48      0.48      0.48       496

 
Training models for AMZN 
GaussianNB Results:
              precision    recall  f1-score   support

           0       0.49      0.64      0.56       230
           1       0.49      0.34      0.41       232

    accuracy                           0.49       462
   macro av