# Import Libraries 

In [438]:
import pandas as pd
import numpy as np
import yfinance as yf 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

# Create features 

In [441]:
def continuous_data_features(df):
    indicators = pd.DataFrame(index=df.index)

    close = df["Close"]
    high = df["High"]
    low = df["Low"]
    vol = df["Volume"]

    # Simple Moving Average 10 day
    indicators["SMA10"] = close.rolling(window=10).mean()

    # Weighted Moving Average 10 day
    weights = np.arange(1, 11)
    indicators["WMA10"] = close.rolling(10).apply(lambda x: np.dot(x, weights)/weights.sum(), raw=True)

    # Momentum 10 day
    indicators["MOM10"] = close - close.shift(10)

    # Stochastic Oscillator %K 14 day
    indicators["STOCHK"] = 100 * (close - low.rolling(14).min()) / (high.rolling(14).max() - low.rolling(14).min())

    # Stochastic Oscillator %D 3 day SMA of %K
    indicators["STOCHD"] = indicators["STOCHK"].rolling(3).mean()

    # Relative Strength Index 14 day 
    delta = close.diff()
    up = delta.clip(lower=0)
    down = -1 * delta.clip(upper=0)
    rs = up.rolling(14).mean() / down.rolling(14).mean()
    indicators["RSI14"] = 100 - (100 / (1 + rs))

    # MACD 12-day EMA - 26 day EMA
    ema12 = close.ewm(span=12, adjust=False).mean()
    ema26 = close.ewm(span=26, adjust=False).mean()
    indicators["MACD"] = ema12 - ema26

    # Williams %R 14 day
    highest14 = high.rolling(14).max()
    lowest14 = low.rolling(14).min()
    indicators["WILLR"] = -100 * (highest14 - close) / (highest14 - lowest14)

    # 9. Accumulation/Distribution Oscillator (ADOSC)
    clv = np.where((high - low) == 0, 0, ((close - low) - (high - close)) / (high - low))
    adl = (clv * vol).cumsum()

    ema3_adl = adl.ewm(span=3, adjust=False).mean()
    ema10_adl = adl.ewm(span=10, adjust=False).mean()

    adosc = ema3_adl - ema10_adl

    # Scale ADOSC to 0–100 like in papers
    min_val = adosc.min()
    max_val = adosc.max()
    indicators["ADOSC"] = 100 * (adosc - min_val) / (max_val - min_val)


    # 10. Commodity Channel Index 20 day
    tp = (high + low + close) / 3
    sma = tp.rolling(20).mean()
    mad = (tp - sma).abs().rolling(20).mean()
    indicators["CCI20"] = (tp - sma) / (0.015 * mad)

    indicators["Close"] = close

    return indicators


In [443]:
def trend_deterministic_data(indicators, df):
    trend = pd.DataFrame(index=indicators.index)

    close = indicators["Close"]
    high = df["High"]
    low = df["Low"]
    vol = df["Volume"]

    #SMA
    trend["SMA10_T"] = np.where(close > indicators["SMA10"], 1, -1)

    #WMA
    trend["WMA10_T"] = np.where(close > indicators["WMA10"], 1, -1)

    #Momentum
    trend["MOM10_T"] = np.where(indicators["MOM10"] > 0, 1, -1)

    # Sophisticated Oscillator trends
    trend["STOCHK_T"] = np.where(indicators["STOCHK"] > indicators["STOCHK"].shift(1), 1, -1)
    trend["STOCHD_T"] = np.where(indicators["STOCHD"] > indicators["STOCHD"].shift(1), 1, -1)

    #RSI trend
    rsi = indicators["RSI14"]
    trend["RSI14_T"] = np.select([rsi > 70, rsi < 30, rsi > rsi.shift(1)],
                                 [-1, 1, 1], default=-1)
    #MACD trend
    trend["MACD_T"] = np.where(indicators["MACD"] > indicators["MACD"].shift(1), 1, -1)

    #Williams %R trend
    trend["WILLR_T"] = np.where(indicators["WILLR"] > indicators["WILLR"].shift(1), 1, -1)

    #ADOSC trend
    trend["ADOSC_T"] = np.where(indicators["ADOSC"] > indicators["ADOSC"].shift(1), 1, -1)

    #CCI trend
    cci = indicators["CCI20"]
    trend["CCI20_T"] = np.select([cci > 200, cci < -200, cci > cci.shift(1)],
                                 [-1, 1, 1], default=-1)
    return trend
    

## Loop Over Tickers 

In [446]:
tickers = ["MSFT", "AMZN", "^GSPC", "^DJI"]
results = []

## Create Continuous Random Forest Model 

In [449]:
def continuous_rf(X, y, n_trees_list):
    results = []

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.5, random_state=42, shuffle=True
    )

    scaler = MinMaxScaler(feature_range=(-1, 1))
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    for n in n_trees_list:
        rf = RandomForestClassifier(n_estimators=n, random_state=42)
        rf.fit(X_train_scaled, y_train)
        y_pred = rf.predict(X_test_scaled)

        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        results.append((n, acc, f1))

    df_results = pd.DataFrame(results, columns=["Trees", "Accuracy", "F1"])
    df_results["Rank"] = df_results["Accuracy"].rank(ascending=False)
    return df_results.sort_values(by="Trees").reset_index(drop=True)

In [463]:
def trend_rf(X, y, n_trees_list):
    results = []

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.5, random_state=42, shuffle=True
    )

    for n in n_trees_list:
        rf = RandomForestClassifier(n_estimators=n, random_state=42)
        rf.fit(X_train, y_train)
        y_pred = rf.predict(X_test)

        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        results.append((n, acc, f1))

    df_results = pd.DataFrame(results, columns=["Trees", "Accuracy", "F1"])
    df_results["Rank"] = df_results["Accuracy"].rank(ascending=False)
    return df_results.sort_values(by="Trees").reset_index(drop=True)

In [453]:
n_trees_list = [20, 40, 60, 80, 100, 120, 140, 160, 180, 200]

In [475]:
for ticker in tickers:
    print(f"\n=== Processing {ticker} ===")
    df = yf.download(ticker, start="2015-01-01", end="2024-12-31", auto_adjust=True)
    df.dropna(inplace=True)

    df["Target"] = np.where(df["Close"] > df["Close"].shift(1), 1, -1)
    df.dropna(inplace=True)

    y = df["Target"]
    X_cont_full = continuous_data_features(df)
    X_trend = trend_deterministic_data(X_cont_full, df)
    X_cont = X_cont_full.drop(columns=["Close"])

    # Continuous
    cont_results = continuous_rf(X_cont, y, n_trees_list)
    print("\n--- Random Forest (Continuous) ---")
    print(cont_results[["Trees", "Accuracy", "F1"]].to_string(index=False))

    top3_cont = cont_results.nlargest(3, "Accuracy")
    print("\nTop 3 Continuous Tree Counts:")
    print(top3_cont[["Trees", "Accuracy", "F1"]].to_string(index=False))

    # Trend
    trend_results = trend_rf(X_trend, y, n_trees_list)
    print("\n--- Random Forest (Trend) ---")
    print(trend_results[["Trees", "Accuracy", "F1"]].to_string(index=False))

    top3_trend = trend_results.nlargest(3, "Accuracy")
    print("\nTop 3 Trend Tree Counts:")
    print(top3_trend[["Trees", "Accuracy", "F1"]].to_string(index=False))

[*********************100%***********************]  1 of 1 completed


=== Processing MSFT ===






--- Random Forest (Continuous) ---
 Trees  Accuracy       F1
    20  0.789348 0.800301
    40  0.789348 0.802386
    60  0.794118 0.807721
    80  0.794913 0.807750
   100  0.794118 0.807435
   120  0.791733 0.805926
   140  0.786169 0.801183
   160  0.789348 0.803267
   180  0.788553 0.802963
   200  0.786963 0.800298

Top 3 Continuous Tree Counts:
 Trees  Accuracy       F1
    80  0.794913 0.807750
    60  0.794118 0.807721
   100  0.794118 0.807435


[*********************100%***********************]  1 of 1 completed


--- Random Forest (Trend) ---
 Trees  Accuracy       F1
    20  0.923688 0.928465
    40  0.922893 0.927666
    60  0.922099 0.926866
    80  0.924483 0.928945
   100  0.923688 0.927711
   120  0.923688 0.927711
   140  0.924483 0.928625
   160  0.924483 0.928625
   180  0.924483 0.928625
   200  0.924483 0.928625

Top 3 Trend Tree Counts:
 Trees  Accuracy       F1
    80  0.924483 0.928945
   140  0.924483 0.928625
   160  0.924483 0.928625

=== Processing AMZN ===






--- Random Forest (Continuous) ---
 Trees  Accuracy       F1
    20  0.794913 0.802450
    40  0.796502 0.806061
    60  0.806041 0.815152
    80  0.804452 0.814480
   100  0.810016 0.820436
   120  0.810811 0.821321
   140  0.810016 0.819894
   160  0.805246 0.815373
   180  0.807631 0.818045
   200  0.809221 0.820090

Top 3 Continuous Tree Counts:
 Trees  Accuracy       F1
   120  0.810811 0.821321
   100  0.810016 0.820436
   140  0.810016 0.819894


[*********************100%***********************]  1 of 1 completed


--- Random Forest (Trend) ---
 Trees  Accuracy       F1
    20  0.900636 0.907339
    40  0.902226 0.909359
    60  0.904610 0.911243
    80  0.904610 0.911243
   100  0.904610 0.911243
   120  0.904610 0.911243
   140  0.904610 0.911243
   160  0.904610 0.911243
   180  0.904610 0.911243
   200  0.904610 0.911243

Top 3 Trend Tree Counts:
 Trees  Accuracy       F1
    60   0.90461 0.911243
    80   0.90461 0.911243
   100   0.90461 0.911243

=== Processing ^GSPC ===






--- Random Forest (Continuous) ---
 Trees  Accuracy       F1
    20  0.800477 0.817984
    40  0.814785 0.832735
    60  0.811606 0.828881
    80  0.814785 0.832495
   100  0.813990 0.831897
   120  0.811606 0.829619
   140  0.815580 0.833333
   160  0.813990 0.832857
   180  0.819555 0.837509
   200  0.819555 0.837741

Top 3 Continuous Tree Counts:
 Trees  Accuracy       F1
   180  0.819555 0.837509
   200  0.819555 0.837741
   140  0.815580 0.833333


[*********************100%***********************]  1 of 1 completed


--- Random Forest (Trend) ---
 Trees  Accuracy       F1
    20  0.924483 0.929682
    40  0.924483 0.929889
    60  0.924483 0.929786
    80  0.924483 0.929786
   100  0.923688 0.929308
   120  0.923688 0.929308
   140  0.924483 0.930096
   160  0.924483 0.930096
   180  0.924483 0.930096
   200  0.923688 0.929412

Top 3 Trend Tree Counts:
 Trees  Accuracy       F1
    20  0.924483 0.929682
    40  0.924483 0.929889
    60  0.924483 0.929786

=== Processing ^DJI ===






--- Random Forest (Continuous) ---
 Trees  Accuracy       F1
    20  0.779809 0.797958
    40  0.796502 0.816881
    60  0.787758 0.808052
    80  0.791733 0.811782
   100  0.788553 0.809456
   120  0.786169 0.806614
   140  0.790938 0.810655
   160  0.794913 0.814121
   180  0.796502 0.815296
   200  0.795707 0.814440

Top 3 Continuous Tree Counts:
 Trees  Accuracy       F1
    40  0.796502 0.816881
   180  0.796502 0.815296
   200  0.795707 0.814440

--- Random Forest (Trend) ---
 Trees  Accuracy       F1
    20  0.923688 0.929099
    40  0.922893 0.928519
    60  0.923688 0.929099
    80  0.922893 0.928519
   100  0.922893 0.928519
   120  0.922893 0.928519
   140  0.922893 0.928519
   160  0.922893 0.928519
   180  0.922893 0.928519
   200  0.922893 0.928519

Top 3 Trend Tree Counts:
 Trees  Accuracy       F1
    20  0.923688 0.929099
    60  0.923688 0.929099
    40  0.922893 0.928519
