## Import Libraries 

In [187]:
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd
import numpy as np
import yfinance as yf

## Create Features 

In [190]:
def continuous_data_features(df):
    indicators = pd.DataFrame(index=df.index)

    close = df["Close"]
    high = df["High"]
    low = df["Low"]
    vol = df["Volume"]

    # Simple Moving Average 10 day
    indicators["SMA10"] = close.rolling(window=10).mean()

    # Weighted Moving Average 10 day
    weights = np.arange(1, 11)
    indicators["WMA10"] = close.rolling(10).apply(lambda x: np.dot(x, weights)/weights.sum(), raw=True)

    # Momentum 10 day
    indicators["MOM10"] = close - close.shift(10)

    # Stochastic Oscillator %K 14 day
    indicators["STOCHK"] = 100 * (close - low.rolling(14).min()) / (high.rolling(14).max() - low.rolling(14).min())

    # Stochastic Oscillator %D 3 day SMA of %K
    indicators["STOCHD"] = indicators["STOCHK"].rolling(3).mean()

    # Relative Strength Index 14 day 
    delta = close.diff()
    up = delta.clip(lower=0)
    down = -1 * delta.clip(upper=0)
    rs = up.rolling(14).mean() / down.rolling(14).mean()
    indicators["RSI14"] = 100 - (100 / (1 + rs))

    # MACD 12-day EMA - 26 day EMA
    ema12 = close.ewm(span=12, adjust=False).mean()
    ema26 = close.ewm(span=26, adjust=False).mean()
    indicators["MACD"] = ema12 - ema26

    # Williams %R 14 day
    highest14 = high.rolling(14).max()
    lowest14 = low.rolling(14).min()
    indicators["WILLR"] = -100 * (highest14 - close) / (highest14 - lowest14)

    # 9. Accumulation/Distribution Oscillator (ADOSC)
    clv = np.where((high - low) == 0, 0, ((close - low) - (high - close)) / (high - low))
    adl = (clv * vol).cumsum()

    ema3_adl = adl.ewm(span=3, adjust=False).mean()
    ema10_adl = adl.ewm(span=10, adjust=False).mean()

    adosc = ema3_adl - ema10_adl

    # Scale ADOSC to 0–100 like in papers
    min_val = adosc.min()
    max_val = adosc.max()
    indicators["ADOSC"] = 100 * (adosc - min_val) / (max_val - min_val)


    # 10. Commodity Channel Index 20 day
    tp = (high + low + close) / 3
    sma = tp.rolling(20).mean()
    mad = (tp - sma).abs().rolling(20).mean()
    indicators["CCI20"] = (tp - sma) / (0.015 * mad)

    indicators["Close"] = close

    return indicators

In [192]:
def trend_deterministic_data(indicators, df):
    trend = pd.DataFrame(index=indicators.index)

    close = indicators["Close"]
    high = df["High"]
    low = df["Low"]
    vol = df["Volume"]

    #SMA
    trend["SMA10_T"] = np.where(close > indicators["SMA10"], 1, -1)

    #WMA
    trend["WMA10_T"] = np.where(close > indicators["WMA10"], 1, -1)

    #Momentum
    trend["MOM10_T"] = np.where(indicators["MOM10"] > 0, 1, -1)

    # Sophisticated Oscillator trends
    trend["STOCHK_T"] = np.where(indicators["STOCHK"] > indicators["STOCHK"].shift(1), 1, -1)
    trend["STOCHD_T"] = np.where(indicators["STOCHD"] > indicators["STOCHD"].shift(1), 1, -1)

    #RSI trend
    rsi = indicators["RSI14"]
    trend["RSI14_T"] = np.select([rsi > 70, rsi < 30, rsi > rsi.shift(1)],
                                 [-1, 1, 1], default=-1)
    #MACD trend
    trend["MACD_T"] = np.where(indicators["MACD"] > indicators["MACD"].shift(1), 1, -1)

    #Williams %R trend
    trend["WILLR_T"] = np.where(indicators["WILLR"] > indicators["WILLR"].shift(1), 1, -1)

    #ADOSC trend
    trend["ADOSC_T"] = np.where(indicators["ADOSC"] > indicators["ADOSC"].shift(1), 1, -1)

    #CCI trend
    cci = indicators["CCI20"]
    trend["CCI20_T"] = np.select([cci > 200, cci < -200, cci > cci.shift(1)],
                                 [-1, 1, 1], default=-1)
    return trend
    

## Loop through tickers 

In [195]:
tickers = ["MSFT", "AMZN", "^GSPC", "^DJI"]
results = []

## Create SVM continuous model using Polynomial Kernel and RBF Kernel

In [228]:
def continuous_svm(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)
    
    scaler = MinMaxScaler(feature_range=(-1, 1))
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    C_values = [0.1, 0.5, 1, 5, 10, 100]
    gammas = [0.1, 0.5, 1, 2, 3, 5]

    results_poly = []
    results_rbf = []

    for C in C_values:
            svm_poly = SVC(kernel="poly", C=C, degree=1, gamma="scale", random_state=42)
            svm_poly.fit(X_train_scaled, y_train)
            y_pred = svm_poly.predict(X_test_scaled)

            acc = accuracy_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred)
            results_poly.append({
                "Kernel": "Polynomial",
                "C": C,
                "degree": 1,
                "Accuracy": acc,
                "F1": f1
            })

    for C in C_values:
        for gamma in gammas:
            svm_rbf = SVC(kernel="rbf", C=C, gamma=gamma, random_state=42)
            svm_rbf.fit(X_train_scaled, y_train)
            y_pred = svm_rbf.predict(X_test_scaled)

            acc = accuracy_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred)
            results_rbf.append({
                "Kernel": "RBF",
                "C": C,
                "gamma": gamma,
                "Accuracy": acc,
                "F1": f1
            })

    df_poly = pd.DataFrame(results_poly)
    df_rbf = pd.DataFrame(results_rbf)

    # Find the best result per kernel
    best_poly = df_poly.loc[df_poly["Accuracy"].idxmax()]
    best_rbf = df_rbf.loc[df_rbf["Accuracy"].idxmax()]

    # Format summary table
    summary = pd.DataFrame({
        "Kernel": ["Polynomial", "RBF"],
        "C": [best_poly["C"], best_rbf["C"]],
        "Degree/Gamma": [best_poly["degree"], best_rbf["gamma"]],
        "Accuracy": [best_poly["Accuracy"], best_rbf["Accuracy"]],
        "F-measure": [best_poly["F1"], best_rbf["F1"]],
    })

    return summary, df_poly, df_rbf

In [246]:
def trend_svm(X, y):
    # Split the dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42, shuffle=True)

    # Scale features
    scaler = MinMaxScaler(feature_range=(-1, 1))
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Define search ranges
    C_values = [0.1, 0.5, 1, 5, 10, 100]
    gammas = [0.1, 0.5, 1, 2, 3, 5]

    results_poly = []
    results_rbf = []

    # Polynomial kernel search
    for C in C_values:
            model = SVC(kernel="poly", C=C, degree=1, gamma="scale", random_state=42)
            model.fit(X_train_scaled, y_train)
            y_pred = model.predict(X_test_scaled)

            acc = accuracy_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred)
            results_poly.append({"Kernel": "Polynomial", "C": C, "degree": 1, "Accuracy": acc, "F1": f1})

    # RBF kernel search
    for C in C_values:
        for gamma in gammas:
            model = SVC(kernel="rbf", C=C, gamma=gamma, random_state=42)
            model.fit(X_train_scaled, y_train)
            y_pred = model.predict(X_test_scaled)

            acc = accuracy_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred)
            results_rbf.append({"Kernel": "RBF", "C": C, "gamma": gamma, "Accuracy": acc, "F1": f1})

    # Convert to DataFrames
    df_poly = pd.DataFrame(results_poly)
    df_rbf = pd.DataFrame(results_rbf)

    # Get best from each kernel by Accuracy
    best_poly = df_poly.loc[df_poly["Accuracy"].idxmax()]
    best_rbf = df_rbf.loc[df_rbf["Accuracy"].idxmax()]

    # Build summary table
    summary = pd.DataFrame({
        "Kernel": ["Polynomial", "RBF"],
        "C": [best_poly["C"], best_rbf["C"]],
        "Degree/Gamma": [best_poly["degree"], best_rbf["gamma"]],
        "Accuracy": [best_poly["Accuracy"], best_rbf["Accuracy"]],
        "F-measure": [best_poly["F1"], best_rbf["F1"]],
    })

    return summary, df_poly, df_rbf

In [238]:
for ticker in tickers:
    print(f"Processing {ticker}")
    
    df = yf.download(ticker, start="2015-01-01", end="2024-12-31", auto_adjust=True)
    df.dropna(inplace=True)
    df["Target"] = np.where(df["Close"] > df["Close"].shift(1), 1, -1)
    df.dropna(inplace=True)

    y = df["Target"]
    X_cont_full = continuous_data_features(df)
    X_cont = X_cont_full.drop(columns=["Close"])

    cont_aligned = pd.concat([X_cont, y], axis=1)
    cont_aligned.dropna(inplace=True)
    X_cont_clean = cont_aligned.drop(columns=['Target'])
    y_cont_clean = cont_aligned['Target']

    summary_results, _, _ = continuous_svm(X_cont_clean, y_cont_clean)
    print("\n--- Continuous SVM Results ---")
    print(summary_results.to_string(index=False))

[*********************100%***********************]  1 of 1 completed

Processing MSFT



[*********************100%***********************]  1 of 1 completed


--- Continuous SVM Results ---
    Kernel     C  Degree/Gamma  Accuracy  F-measure
Polynomial  10.0           1.0  0.832123   0.847507
       RBF 100.0           0.1  0.829701   0.842184
Processing AMZN



[*********************100%***********************]  1 of 1 completed


--- Continuous SVM Results ---
    Kernel     C  Degree/Gamma  Accuracy  F-measure
Polynomial  10.0           1.0  0.824052   0.837313
       RBF 100.0           0.1  0.822437   0.833837
Processing ^GSPC



[*********************100%***********************]  1 of 1 completed


--- Continuous SVM Results ---
    Kernel     C  Degree/Gamma  Accuracy  F-measure
Polynomial 100.0           1.0  0.846651   0.860088
       RBF 100.0           0.1  0.833737   0.846726
Processing ^DJI






--- Continuous SVM Results ---
    Kernel    C  Degree/Gamma  Accuracy  F-measure
Polynomial 10.0           1.0  0.840194   0.856105
       RBF 10.0           0.1  0.840194   0.854839


In [255]:
for ticker in tickers:
    print(f"Processing {ticker}")
    
    df = yf.download(ticker, start="2015-01-01", end="2024-12-31", auto_adjust=True)
    df.dropna(inplace=True)
    df["Target"] = np.where(df["Close"] > df["Close"].shift(1), 1, -1)
    df.dropna(inplace=True)

    y = df["Target"]
    
    X_cont_full = continuous_data_features(df)
    X_trend_full = trend_deterministic_data(X_cont_full, df)

    trend_aligned = pd.concat([X_trend_full, y], axis=1)
    trend_aligned.dropna(inplace=True)
    X_trend_clean = trend_aligned.drop(columns=['Target'])
    y_trend_clean = trend_aligned['Target']

    summary_results, _, _ = trend_svm(X_trend_clean, y_trend_clean)
    
    print("\n--- Trend SVM Results ---")
    print(summary_results.to_string(index=False))

[*********************100%***********************]  1 of 1 completed

Processing MSFT



[*********************100%***********************]  1 of 1 completed


--- Trend SVM Results ---
    Kernel   C  Degree/Gamma  Accuracy  F-measure
Polynomial 5.0           1.0  0.925278   0.928571
       RBF 1.0           0.1  0.931638   0.934551
Processing AMZN



[*********************100%***********************]  1 of 1 completed


--- Trend SVM Results ---
    Kernel   C  Degree/Gamma  Accuracy  F-measure
Polynomial 1.0           1.0  0.914944   0.920799
       RBF 0.1           0.1  0.910970   0.916542
Processing ^GSPC



[*********************100%***********************]  1 of 1 completed


--- Trend SVM Results ---
    Kernel   C  Degree/Gamma  Accuracy  F-measure
Polynomial 0.5           1.0  0.918919   0.924332
       RBF 1.0           0.1  0.924483   0.930198
Processing ^DJI






--- Trend SVM Results ---
    Kernel   C  Degree/Gamma  Accuracy  F-measure
Polynomial 0.5           1.0  0.910970   0.916418
       RBF 0.5           0.5  0.922893   0.928519
