In [15]:
# ============================================
# Simple SP500 Validation (Restricted Features)
# X = ['Close','High','Low','Volume']; y = Direction (next-day up=1 else 0)
# Models: XGBoost + CNN/LSTM/Bi-LSTM/Stacked LSTM + Sequential PPO
# Metrics: Accuracy, Precision, Recall, F1
# SMOTE on training only; PPO steps=50k; Reward (+1 / -5)
# Saves CNN & PPO confusion matrices at 300 DPI
# ============================================

import warnings, time
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from imblearn.over_sampling import SMOTE

from xgboost import XGBRegressor

import tensorflow as tf
tf.random.set_seed(42)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, LSTM, Bidirectional, Flatten, Input
from tensorflow.keras.optimizers import Adam

import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

import matplotlib.pyplot as plt
import seaborn as sns

# -----------------------
# Config
# -----------------------
FILE_PATH = "/Users/dhadel/Downloads/Validation_SP500_2017_2022.xlsx"   # change if local
TEST_SIZE = 0.20
EPOCHS = 50
RANDOM_STATE = 42
PPO_STEPS = 50_000
POS_REWARD, NEG_REWARD = 1, -5

# -----------------------
# Load & minimal prep
# -----------------------
df = pd.read_excel(FILE_PATH, engine="openpyxl")

# Keep only needed columns (must exist in file)
df = df[['Date','Open','High','Low','Close','Volume']].copy()

# Ensure numeric for features
for c in ['Open','High','Low','Close','Volume']:
    df[c] = pd.to_numeric(df[c], errors='coerce')

# Target: next-day direction (1 if next Close > today Close)
df['Direction'] = (df['Close'].shift(-1) > df['Close']).astype(int)
df = df.dropna(subset=['Close','High','Low','Volume','Direction']).copy()

# Features & target
X = df[['Close','High','Low','Volume']].copy()
y = df['Direction'].astype(int)

# Clean inf/nan
X = X.replace([np.inf, -np.inf], np.nan)
X = X.fillna(X.median(numeric_only=True))

# Split (mirror baseline style)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=y
)

# SMOTE on training only
smote = SMOTE(random_state=RANDOM_STATE)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

# Scale for DL
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_res)
X_test_scaled  = scaler.transform(X_test)

# -----------------------
# Helpers
# -----------------------
def metrics_from_continuous(y_true, y_pred_cont):
    yb = np.round(np.nan_to_num(y_pred_cont)).astype(int).clip(0,1)
    return {
        'Accuracy':  accuracy_score(y_true, yb),
        'Precision': precision_score(y_true, yb, average='weighted', zero_division=1),
        'Recall':    recall_score(y_true, yb, average='weighted', zero_division=1),
        'F1':        f1_score(y_true, yb, average='weighted', zero_division=1)
    }

results = []

# -----------------------
# XGBoost (regressor head like baseline)
# -----------------------
xgb = XGBRegressor(
    n_estimators=120, max_depth=3, learning_rate=0.1, subsample=0.8,
    n_jobs=-1, random_state=RANDOM_STATE
)
t0 = time.time()
xgb.fit(X_train, y_train)
xgb_time = round(time.time() - t0, 2)
xgb_pred = xgb.predict(X_test)
res = metrics_from_continuous(y_test, xgb_pred); res.update({'Model':'XGBoost','Time (s)':xgb_time}); results.append(res)

# -----------------------
# DL models (tabular -> 1D)
# -----------------------
X_train_dl = X_train_scaled.reshape(-1, X_train_scaled.shape[1], 1)
X_test_dl  = X_test_scaled.reshape(-1,  X_test_scaled.shape[1], 1)

def make_dl(kind, input_len):
    m = Sequential([Input(shape=(input_len,1))])
    if kind == 'CNN':
        m.add(Conv1D(64, 2, activation='relu'))
        m.add(Flatten())
    elif kind == 'LSTM':
        m.add(LSTM(50, activation='relu'))
    elif kind == 'Bi-LSTM':
        m.add(Bidirectional(LSTM(50, activation='relu')))
    elif kind == 'Stacked LSTM':
        m.add(LSTM(50, activation='relu', return_sequences=True))
        m.add(LSTM(50, activation='relu'))
    m.add(Dense(1, activation='linear'))
    m.compile(optimizer=Adam(1e-3, clipnorm=1.0), loss='mse')
    return m

dl_preds = {}
for name in ['CNN','LSTM','Bi-LSTM','Stacked LSTM']:
    mdl = make_dl(name, X_train_dl.shape[1])
    t0 = time.time()
    mdl.fit(X_train_dl, y_train_res, epochs=EPOCHS, verbose=0)
    t_train = round(time.time() - t0, 2)
    pred = mdl.predict(X_test_dl, verbose=0).ravel()
    dl_preds[name] = pred
    res = metrics_from_continuous(y_test, pred); res.update({'Model':name,'Time (s)':t_train}); results.append(res)

# -----------------------
# PPO Sequential Hybrid (meta over model outputs)
# -----------------------
stack = np.column_stack([
    xgb_pred,
    dl_preds['CNN'],
    dl_preds['LSTM'],
    dl_preds['Bi-LSTM'],
    dl_preds['Stacked LSTM']
]).astype(np.float32)

stack = MinMaxScaler().fit_transform(stack).astype(np.float32)
ppo_labels = y_test.values.astype(int)

class PPOEnv(gym.Env):
    def __init__(self, X, y, pos_reward=1, neg_reward=-5):
        super().__init__()
        self.X, self.y = X, y
        self.n = len(y); self.i = 0
        self.pos_reward, self.neg_reward = pos_reward, neg_reward
        self.observation_space = spaces.Box(low=0, high=1, shape=(X.shape[1],), dtype=np.float32)
        self.action_space = spaces.Discrete(2)
    def reset(self, seed=None, options=None):
        self.i = 0
        return self.X[self.i], {}
    def step(self, action):
        r = self.pos_reward if action == self.y[self.i] else self.neg_reward
        self.i += 1
        done = self.i >= self.n
        obs = self.X[self.i] if not done else np.zeros(self.X.shape[1], dtype=np.float32)
        return obs, r, done, False, {}

env = make_vec_env(lambda: PPOEnv(stack, ppo_labels, POS_REWARD, NEG_REWARD), n_envs=1)
ppo = PPO("MlpPolicy", env, verbose=0, seed=RANDOM_STATE)

t0 = time.time()
ppo.learn(total_timesteps=PPO_STEPS)
ppo_time = round(time.time() - t0, 2)

# Evaluate sequentially
eval_env = PPOEnv(stack, ppo_labels, POS_REWARD, NEG_REWARD)
obs, _ = eval_env.reset()
ppo_preds = []
done = False
while not done:
    act, _ = ppo.predict(obs, deterministic=True)
    ppo_preds.append(int(act))
    obs, _, done, _, _ = eval_env.step(int(act))

# PPO metrics
ppo_metrics = {
    'Accuracy':  accuracy_score(ppo_labels, ppo_preds),
    'Precision': precision_score(ppo_labels, ppo_preds, average='weighted', zero_division=1),
    'Recall':    recall_score(ppo_labels, ppo_preds, average='weighted', zero_division=1),
    'F1':        f1_score(ppo_labels, ppo_preds, average='weighted', zero_division=1)
}
ppo_metrics.update({'Model':'PPO (Sequential Hybrid)','Time (s)': ppo_time})
results.append(ppo_metrics)

# -----------------------
# Report + Save CMs (300 DPI)
# -----------------------
out = pd.DataFrame(results)[['Model','Accuracy','Precision','Recall','F1','Time (s)']]
print("\n=== Results ===")
print(out.sort_values('Accuracy', ascending=False).to_string(index=False))

# CNN CM
plt.figure(figsize=(5,4))
sns.heatmap(confusion_matrix(y_test, np.round(dl_preds['CNN']).astype(int).clip(0,1)),
            annot=True, fmt='d', cmap='Blues')
plt.title("CNN Confusion Matrix"); plt.xlabel("Predicted"); plt.ylabel("Actual")
plt.tight_layout(); plt.savefig("cm_cnn_300dpi.png", dpi=300); plt.close()
print("Saved: cm_cnn_300dpi.png")

# PPO CM
plt.figure(figsize=(5,4))
sns.heatmap(confusion_matrix(ppo_labels, ppo_preds),
            annot=True, fmt='d', cmap='Greens')
plt.title("PPO Confusion Matrix"); plt.xlabel("Predicted"); plt.ylabel("Actual")
plt.tight_layout(); plt.savefig("cm_ppo_300dpi.png", dpi=300); plt.close()
print("Saved: cm_ppo_300dpi.png")



=== Results ===
                  Model  Accuracy  Precision   Recall       F1  Time (s)
PPO (Sequential Hybrid)  0.567460   0.754551 0.567460 0.410870     13.85
                XGBoost  0.555556   0.545849 0.555556 0.546485      0.02
                Bi-LSTM  0.440476   0.478741 0.440476 0.383044      1.46
                   LSTM  0.424603   0.449870 0.424603 0.388866      0.94
           Stacked LSTM  0.424603   0.373774 0.424603 0.271102      1.59
                    CNN  0.408730   0.426057 0.408730 0.366644      0.55
Saved: cm_cnn_300dpi.png
Saved: cm_ppo_300dpi.png


In [17]:
# ============================================================
# Improved SP500 Validation (Better Features + Time-Series Split)
# X base: ['Close','High','Low','Volume']  (+ engineered tech features)
# y: Direction (next-day Close up=1 else 0)
# Models: XGBoost + CNN/LSTM/Bi-LSTM/Stacked LSTM + Sequential PPO
# Metrics: Accuracy, Precision, Recall, F1
# SMOTE on training only; PPO steps=50k; Reward (+1 / -5)
# Saves CNN & PPO confusion matrices at 300 DPI
# ============================================================

import warnings, time
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from imblearn.over_sampling import SMOTE

from xgboost import XGBRegressor

import tensorflow as tf
tf.random.set_seed(42)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, LSTM, Bidirectional, Flatten, Input
from tensorflow.keras.optimizers import Adam

import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

import matplotlib.pyplot as plt
import seaborn as sns

# -----------------------
# Config
# -----------------------
FILE_PATH = "/Users/dhadel/Downloads/Validation_SP500_2017_2022.xlsx"   # change if local
RANDOM_STATE = 42
TEST_FRACTION = 0.20          # last 20% by time for testing
EPOCHS = 20                   # a bit more training for DL
PPO_STEPS = 50_000
POS_REWARD, NEG_REWARD = 1, -5
RESULTS_CSV = "sp500_improved_results.csv"

# Optional: band to ignore tiny moves (set to 0.0 to disable)
# If you set e.g. 0.003 (0.3%), neutral days are dropped from training/testing.
MIN_MOVE_BAND = 0.0

# -----------------------
# Load data
# -----------------------
df = pd.read_excel(FILE_PATH, engine="openpyxl")

# Keep only needed columns & ensure types
df = df[['Date','Open','High','Low','Close','Volume']].copy()
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
for c in ['Open','High','Low','Close','Volume']:
    df[c] = pd.to_numeric(df[c], errors='coerce')

df = df.dropna(subset=['Date','Open','High','Low','Close','Volume']).sort_values('Date').reset_index(drop=True)

# -----------------------
# Target: next-day direction
# -----------------------
ret1 = df['Close'].pct_change().shift(-1)  # next-day return relative to today
if MIN_MOVE_BAND > 0.0:
    # Label only if move exceeds band; drop neutral days
    df['Direction'] = np.where(ret1 > MIN_MOVE_BAND, 1, np.where(ret1 < -MIN_MOVE_BAND, 0, np.nan))
    df = df.dropna(subset=['Direction']).copy()
    df['Direction'] = df['Direction'].astype(int)
else:
    df['Direction'] = (df['Close'].shift(-1) > df['Close']).astype(int)

# Drop last row (no future label)
df = df.iloc[:-1, :].copy()

# -----------------------
# Lightweight technical features (all causal: lags/rolling only)
# -----------------------
df['ret'] = df['Close'].pct_change()      # daily return (t - 1)
# lagged returns
for k in [1,2,3,5]:
    df[f'lag_ret_{k}'] = df['ret'].shift(k)

# moving averages & ratios
for w in [5,10,20]:
    df[f'ma_{w}'] = df['Close'].rolling(w, min_periods=w).mean()
    df[f'ma_ratio_{w}'] = df['Close'] / (df[f'ma_{w}'] + 1e-12)

# rolling volatility of returns
for w in [5,10,20]:
    df[f'vol_{w}'] = df['ret'].rolling(w, min_periods=w).std()

# RSI(14)
delta = df['Close'].diff()
gain = delta.clip(lower=0).rolling(14, min_periods=14).mean()
loss = (-delta.clip(upper=0)).rolling(14, min_periods=14).mean()
rs = gain / (loss + 1e-12)
df['rsi_14'] = 100 - (100 / (1 + rs))

# MACD (12,26,9)
ema12 = df['Close'].ewm(span=12, adjust=False).mean()
ema26 = df['Close'].ewm(span=26, adjust=False).mean()
macd = ema12 - ema26
signal = macd.ewm(span=9, adjust=False).mean()
df['macd'] = macd
df['macd_signal'] = signal
df['macd_hist'] = macd - signal

# simple price ratios
df['hl_range'] = (df['High'] - df['Low']) / (df['Close'] + 1e-12)
df['co_ratio'] = df['Close'] / (df['Open'] + 1e-12)

# Base features + engineered
base_feats = ['Close','High','Low','Volume']
eng_feats  = [c for c in [
    'ret', 'lag_ret_1','lag_ret_2','lag_ret_3','lag_ret_5',
    'ma_ratio_5','ma_ratio_10','ma_ratio_20',
    'vol_5','vol_10','vol_20',
    'rsi_14','macd','macd_signal','macd_hist',
    'hl_range','co_ratio'
] if c in df.columns]

feature_cols = base_feats + eng_feats

# Drop rows with NA introduced by rolling/lag features
df = df.dropna(subset=feature_cols + ['Direction']).copy()

X_all = df[feature_cols].replace([np.inf,-np.inf], np.nan).fillna(method='ffill').fillna(method='bfill')
y_all = df['Direction'].astype(int).values
dates = df['Date'].values

# -----------------------
# Time-series split (chronological 80/20)
# -----------------------
n = len(df)
test_start = int(np.floor((1.0 - TEST_FRACTION) * n))
X_train, X_test = X_all.iloc[:test_start], X_all.iloc[test_start:]
y_train, y_test = y_all[:test_start], y_all[test_start:]
dates_train, dates_test = dates[:test_start], dates[test_start:]

# -----------------------
# SMOTE on training only + scaling for DL
# -----------------------
sm = SMOTE(random_state=RANDOM_STATE)
X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_res)
X_test_scaled  = scaler.transform(X_test)

# -----------------------
# Helpers
# -----------------------
def metrics_from_continuous(y_true, y_pred_cont):
    yb = np.round(np.nan_to_num(y_pred_cont)).astype(int).clip(0,1)
    return {
        'Accuracy':  accuracy_score(y_true, yb),
        'Precision': precision_score(y_true, yb, average='weighted', zero_division=1),
        'Recall':    recall_score(y_true, yb, average='weighted', zero_division=1),
        'F1':        f1_score(y_true, yb, average='weighted', zero_division=1)
    }

results = []

# -----------------------
# 1) XGBoost (regressor head to mirror baseline behavior)
# -----------------------
xgb = XGBRegressor(
    n_estimators=200, max_depth=4, learning_rate=0.05, subsample=0.8,
    colsample_bytree=0.9, n_jobs=-1, random_state=RANDOM_STATE
)
t0 = time.time()
xgb.fit(X_train, y_train)
xgb_time = round(time.time() - t0, 2)
xgb_pred = xgb.predict(X_test)
res = metrics_from_continuous(y_test, xgb_pred); res.update({'Model':'XGBoost','Time (s)':xgb_time}); results.append(res)

# -----------------------
# 2) DL models
# -----------------------
X_train_dl = X_train_scaled.reshape(-1, X_train_scaled.shape[1], 1)
X_test_dl  = X_test_scaled.reshape(-1,  X_test_scaled.shape[1], 1)

def make_dl(kind, input_len):
    m = Sequential([Input(shape=(input_len,1))])
    if kind == 'CNN':
        m.add(Conv1D(64, 3, activation='relu'))
        m.add(Flatten())
    elif kind == 'LSTM':
        m.add(LSTM(64, activation='relu'))
    elif kind == 'Bi-LSTM':
        m.add(Bidirectional(LSTM(64, activation='relu')))
    elif kind == 'Stacked LSTM':
        m.add(LSTM(64, activation='relu', return_sequences=True))
        m.add(LSTM(64, activation='relu'))
    m.add(Dense(1, activation='linear'))
    m.compile(optimizer=Adam(1e-3, clipnorm=1.0), loss='mse')
    return m

dl_preds = {}
for name in ['CNN','LSTM','Bi-LSTM','Stacked LSTM']:
    mdl = make_dl(name, X_train_dl.shape[1])
    t0 = time.time()
    mdl.fit(X_train_dl, y_train_res, epochs=EPOCHS, verbose=0)
    elapsed = round(time.time() - t0, 2)
    pred = mdl.predict(X_test_dl, verbose=0).ravel()
    dl_preds[name] = pred
    res = metrics_from_continuous(y_test, pred); res.update({'Model':name,'Time (s)':elapsed}); results.append(res)

# -----------------------
# 3) PPO Sequential Hybrid (meta over model outputs)
# -----------------------
stack = np.column_stack([
    xgb_pred,
    dl_preds['CNN'],
    dl_preds['LSTM'],
    dl_preds['Bi-LSTM'],
    dl_preds['Stacked LSTM']
]).astype(np.float32)

stack = MinMaxScaler().fit_transform(stack).astype(np.float32)
ppo_labels = y_test.astype(int)

class PPOEnv(gym.Env):
    def __init__(self, X, y, pos_reward=1, neg_reward=-5):
        super().__init__()
        self.X, self.y = X, y
        self.n = len(y); self.i = 0
        self.pos_reward, self.neg_reward = pos_reward, neg_reward
        self.observation_space = spaces.Box(low=0, high=1, shape=(X.shape[1],), dtype=np.float32)
        self.action_space = spaces.Discrete(2)
    def reset(self, seed=None, options=None):
        self.i = 0
        return self.X[self.i], {}
    def step(self, action):
        r = self.pos_reward if action == self.y[self.i] else self.neg_reward
        self.i += 1
        done = self.i >= self.n
        obs = self.X[self.i] if not done else np.zeros(self.X.shape[1], dtype=np.float32)
        return obs, r, done, False, {}

env = make_vec_env(lambda: PPOEnv(stack, ppo_labels, POS_REWARD, NEG_REWARD), n_envs=1)
ppo = PPO("MlpPolicy", env, verbose=0, seed=RANDOM_STATE)

t0 = time.time()
ppo.learn(total_timesteps=PPO_STEPS)
ppo_time = round(time.time() - t0, 2)

# Evaluate sequentially
eval_env = PPOEnv(stack, ppo_labels, POS_REWARD, NEG_REWARD)
obs, _ = eval_env.reset()
ppo_preds = []
done = False
while not done:
    act, _ = ppo.predict(obs, deterministic=True)
    ppo_preds.append(int(act))
    obs, _, done, _, _ = eval_env.step(int(act))

# PPO metrics
ppo_metrics = {
    'Accuracy':  accuracy_score(ppo_labels, ppo_preds),
    'Precision': precision_score(ppo_labels, ppo_preds, average='weighted', zero_division=1),
    'Recall':    recall_score(ppo_labels, ppo_preds, average='weighted', zero_division=1),
    'F1':        f1_score(ppo_labels, ppo_preds, average='weighted', zero_division=1)
}
ppo_metrics.update({'Model':'PPO (Sequential Hybrid)','Time (s)': ppo_time})
results.append(ppo_metrics)

# -----------------------
# Report + Save (300 DPI CMs)
# -----------------------
out = pd.DataFrame(results)[['Model','Accuracy','Precision','Recall','F1','Time (s)']]
print("\n=== Results (Improved) ===")
print(out.sort_values('Accuracy', ascending=False).to_string(index=False))
out.to_csv(RESULTS_CSV, index=False)
print(f"\nSaved metrics to: {RESULTS_CSV}")

# Confusion matrices
plt.figure(figsize=(5,4))
sns.heatmap(confusion_matrix(y_test, np.round(dl_preds['CNN']).astype(int).clip(0,1)),
            annot=True, fmt='d', cmap='Blues')
plt.title("CNN Confusion Matrix"); plt.xlabel("Predicted"); plt.ylabel("Actual")
plt.tight_layout(); plt.savefig("cm_cnn_improved_300dpi.png", dpi=300); plt.close()
print("Saved: cm_cnn_improved_300dpi.png")

plt.figure(figsize=(5,4))
sns.heatmap(confusion_matrix(ppo_labels, ppo_preds),
            annot=True, fmt='d', cmap='Greens')
plt.title("PPO Confusion Matrix"); plt.xlabel("Predicted"); plt.ylabel("Actual")
plt.tight_layout(); plt.savefig("cm_ppo_improved_300dpi.png", dpi=300); plt.close()
print("Saved: cm_ppo_improved_300dpi.png")



=== Results (Improved) ===
                  Model  Accuracy  Precision   Recall       F1  Time (s)
                    CNN  0.564516   0.541432 0.564516 0.488886      0.88
PPO (Sequential Hybrid)  0.564516   0.754162 0.564516 0.407383     12.73
                Bi-LSTM  0.560484   0.517125 0.560484 0.437896      4.91
                   LSTM  0.556452   0.519657 0.556452 0.466501      2.98
                XGBoost  0.500000   0.500000 0.500000 0.500000      0.11
           Stacked LSTM  0.495968   0.611082 0.495968 0.423582      5.92

Saved metrics to: sp500_improved_results.csv
Saved: cm_cnn_improved_300dpi.png
Saved: cm_ppo_improved_300dpi.png


In [19]:
# ============================================================
# SP500 — Multi-Split Comparison (20%, 30%, 40% test)
# Models: XGBoost + CNN + LSTM + Bi-LSTM + Stacked LSTM + Sequential PPO
# Metrics: Accuracy, Precision, Recall, F1
# SMOTE on training only; PPO steps=50k; Reward (+1/-5); Chronological splits
# Outputs: results_20.csv, results_30.csv, results_40.csv, results_summary.csv
# ============================================================

import warnings, time
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE

from xgboost import XGBRegressor

import tensorflow as tf
tf.random.set_seed(42)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, LSTM, Bidirectional, Flatten, Input
from tensorflow.keras.optimizers import Adam

import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

# -----------------------
# Config
# -----------------------
FILE_PATH = "/Users/dhadel/Downloads/Validation_SP500_2017_2022.xlsx"   # change if needed
RANDOM_STATE = 42
EPOCHS = 20
PPO_STEPS = 50_000
POS_REWARD, NEG_REWARD = 1, -5
TEST_FRACTIONS = [0.20, 0.30, 0.40]  # 20%, 30%, 40%
MIN_MOVE_BAND = 0.0  # set >0.0 (e.g., 0.003) to ignore tiny next-day moves

np.random.seed(RANDOM_STATE)

# -----------------------
# Helpers
# -----------------------
def metrics_from_continuous(y_true, y_pred_cont):
    yb = np.round(np.nan_to_num(y_pred_cont)).astype(int).clip(0,1)
    return {
        'Accuracy':  accuracy_score(y_true, yb),
        'Precision': precision_score(y_true, yb, average='weighted', zero_division=1),
        'Recall':    recall_score(y_true, yb, average='weighted', zero_division=1),
        'F1':        f1_score(y_true, yb, average='weighted', zero_division=1)
    }

def build_features(df):
    """Causal, lightweight technical features for better signal."""
    df = df[['Date','Open','High','Low','Close','Volume']].copy()
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
    for c in ['Open','High','Low','Close','Volume']:
        df[c] = pd.to_numeric(df[c], errors='coerce')
    df = df.dropna().sort_values('Date').reset_index(drop=True)

    # Target: next-day direction (optionally with a neutral band)
    next_ret = df['Close'].pct_change().shift(-1)
    if MIN_MOVE_BAND > 0.0:
        y = np.where(next_ret > MIN_MOVE_BAND, 1, np.where(next_ret < -MIN_MOVE_BAND, 0, np.nan))
        df['Direction'] = y
        df = df.dropna(subset=['Direction']).copy()
        df['Direction'] = df['Direction'].astype(int)
    else:
        df['Direction'] = (df['Close'].shift(-1) > df['Close']).astype(int)

    # Drop last row (no future label)
    df = df.iloc[:-1, :].copy()

    # Base features
    df['ret'] = df['Close'].pct_change()
    for k in [1,2,3,5]:
        df[f'lag_ret_{k}'] = df['ret'].shift(k)

    for w in [5,10,20]:
        df[f'ma_{w}'] = df['Close'].rolling(w, min_periods=w).mean()
        df[f'ma_ratio_{w}'] = df['Close'] / (df[f'ma_{w}'] + 1e-12)
        df[f'vol_{w}'] = df['ret'].rolling(w, min_periods=w).std()

    # RSI(14)
    delta = df['Close'].diff()
    gain = delta.clip(lower=0).rolling(14, min_periods=14).mean()
    loss = (-delta.clip(upper=0)).rolling(14, min_periods=14).mean()
    rs = gain / (loss + 1e-12)
    df['rsi_14'] = 100 - (100 / (1 + rs))

    # MACD (12,26,9)
    ema12 = df['Close'].ewm(span=12, adjust=False).mean()
    ema26 = df['Close'].ewm(span=26, adjust=False).mean()
    macd = ema12 - ema26
    signal = macd.ewm(span=9, adjust=False).mean()
    df['macd'] = macd
    df['macd_signal'] = signal
    df['macd_hist'] = macd - signal

    # Simple price ratios
    df['hl_range'] = (df['High'] - df['Low']) / (df['Close'] + 1e-12)
    df['co_ratio'] = df['Close'] / (df['Open'] + 1e-12)

    feature_cols = ['Close','High','Low','Volume',
                    'ret','lag_ret_1','lag_ret_2','lag_ret_3','lag_ret_5',
                    'ma_ratio_5','ma_ratio_10','ma_ratio_20',
                    'vol_5','vol_10','vol_20',
                    'rsi_14','macd','macd_signal','macd_hist',
                    'hl_range','co_ratio']

    df = df.dropna(subset=feature_cols + ['Direction']).copy()

    X_all = df[feature_cols].replace([np.inf, -np.inf], np.nan).fillna(method='ffill').fillna(method='bfill')
    y_all = df['Direction'].astype(int).values
    dates = df['Date'].values
    return X_all, y_all, dates

def make_dl(kind, input_len):
    m = Sequential([Input(shape=(input_len,1))])
    if kind == 'CNN':
        m.add(Conv1D(64, 3, activation='relu')); m.add(Flatten())
    elif kind == 'LSTM':
        m.add(LSTM(64, activation='relu'))
    elif kind == 'Bi-LSTM':
        m.add(Bidirectional(LSTM(64, activation='relu')))
    elif kind == 'Stacked LSTM':
        m.add(LSTM(64, activation='relu', return_sequences=True)); m.add(LSTM(64, activation='relu'))
    m.add(Dense(1, activation='linear'))
    m.compile(optimizer=Adam(1e-3, clipnorm=1.0), loss='mse')
    return m

def run_split(X_all, y_all, dates, test_fraction, seed=RANDOM_STATE):
    """Chronological split → SMOTE on train → scale → train all models → return results DataFrame."""
    n = len(X_all)
    test_start = int(np.floor((1.0 - test_fraction) * n))
    X_train, X_test = X_all.iloc[:test_start], X_all.iloc[test_start:]
    y_train, y_test = y_all[:test_start], y_all[test_start:]

    # SMOTE on training only
    sm = SMOTE(random_state=seed)
    X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

    # Scale for DL inputs
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_res)
    X_test_scaled  = scaler.transform(X_test)

    results = []

    # XGBoost (regressor head)
    xgb = XGBRegressor(
        n_estimators=200, max_depth=4, learning_rate=0.05, subsample=0.8,
        colsample_bytree=0.9, n_jobs=-1, random_state=seed
    )
    t0 = time.time(); xgb.fit(X_train, y_train); xgb_time = round(time.time() - t0, 2)
    xgb_pred = xgb.predict(X_test)
    r = metrics_from_continuous(y_test, xgb_pred); r.update({'Model':'XGBoost','Time (s)': xgb_time}); results.append(r)

    # DL models
    X_train_dl = X_train_scaled.reshape(-1, X_train_scaled.shape[1], 1)
    X_test_dl  = X_test_scaled.reshape(-1,  X_test_scaled.shape[1], 1)

    dl_preds = {}
    for name in ['CNN','LSTM','Bi-LSTM','Stacked LSTM']:
        mdl = make_dl(name, X_train_dl.shape[1])
        t0 = time.time(); mdl.fit(X_train_dl, y_train_res, epochs=EPOCHS, verbose=0); t_dl = round(time.time() - t0, 2)
        pred = mdl.predict(X_test_dl, verbose=0).ravel()
        dl_preds[name] = pred
        r = metrics_from_continuous(y_test, pred); r.update({'Model': name, 'Time (s)': t_dl}); results.append(r)

    # PPO sequential hybrid (meta over model outputs)
    stack = np.column_stack([
        xgb_pred,
        dl_preds['CNN'],
        dl_preds['LSTM'],
        dl_preds['Bi-LSTM'],
        dl_preds['Stacked LSTM']
    ]).astype(np.float32)

    stack = MinMaxScaler().fit_transform(stack).astype(np.float32)
    ppo_labels = y_test.astype(int)

    class PPOEnv(gym.Env):
        def __init__(self, X, y, pos_reward=1, neg_reward=-5):
            super().__init__()
            self.X, self.y = X, y
            self.n = len(y); self.i = 0
            self.pos_reward, self.neg_reward = pos_reward, neg_reward
            self.observation_space = spaces.Box(low=0, high=1, shape=(X.shape[1],), dtype=np.float32)
            self.action_space = spaces.Discrete(2)
        def reset(self, seed=None, options=None):
            self.i = 0
            return self.X[self.i], {}
        def step(self, action):
            r = self.pos_reward if action == self.y[self.i] else self.neg_reward
            self.i += 1
            done = self.i >= self.n
            obs = self.X[self.i] if not done else np.zeros(self.X.shape[1], dtype=np.float32)
            return obs, r, done, False, {}

    env = make_vec_env(lambda: PPOEnv(stack, ppo_labels, POS_REWARD, NEG_REWARD), n_envs=1)
    ppo = PPO("MlpPolicy", env, verbose=0, seed=seed)
    t0 = time.time(); ppo.learn(total_timesteps=PPO_STEPS); ppo_time = round(time.time() - t0, 2)

    # Evaluate sequentially
    eval_env = PPOEnv(stack, ppo_labels, POS_REWARD, NEG_REWARD)
    obs, _ = eval_env.reset(); preds = []; done = False
    while not done:
        act, _ = ppo.predict(obs, deterministic=True)
        preds.append(int(act))
        obs, _, done, _, _ = eval_env.step(int(act))

    # PPO metrics
    ppo_metrics = {
        'Accuracy':  accuracy_score(ppo_labels, preds),
        'Precision': precision_score(ppo_labels, preds, average='weighted', zero_division=1),
        'Recall':    recall_score(ppo_labels, preds, average='weighted', zero_division=1),
        'F1':        f1_score(ppo_labels, preds, average='weighted', zero_division=1)
    }
    ppo_metrics.update({'Model':'PPO (Sequential Hybrid)','Time (s)': ppo_time})
    results.append(ppo_metrics)

    df_res = pd.DataFrame(results)[['Model','Accuracy','Precision','Recall','F1','Time (s)']]
    return df_res

# -----------------------
# Run once (compute features), then evaluate all splits
# -----------------------
raw = pd.read_excel(FILE_PATH, engine="openpyxl")
X_all, y_all, dates = build_features(raw)

summary_rows = []
for frac in TEST_FRACTIONS:
    df_split = run_split(X_all, y_all, dates, test_fraction=frac)
    fname = f"results_{int(frac*100)}.csv"
    df_split.to_csv(fname, index=False)
    print(f"\n=== Split {int(frac*100)}% test ===")
    print(df_split.sort_values('Accuracy', ascending=False).to_string(index=False))
    # collect for summary
    tmp = df_split.copy()
    tmp.insert(0, 'Split', f"{int(frac*100)}%")
    summary_rows.append(tmp)

summary = pd.concat(summary_rows, ignore_index=True)
summary.to_csv("results_summary.csv", index=False)
print("\nSaved: results_20.csv, results_30.csv, results_40.csv, results_summary.csv")



=== Split 20% test ===
                  Model  Accuracy  Precision   Recall       F1  Time (s)
                    CNN  0.564516   0.541432 0.564516 0.488886      0.92
PPO (Sequential Hybrid)  0.564516   0.754162 0.564516 0.407383     14.45
                Bi-LSTM  0.560484   0.517125 0.560484 0.437896      4.91
                   LSTM  0.556452   0.519657 0.556452 0.466501      2.78
                XGBoost  0.500000   0.500000 0.500000 0.500000      0.12
           Stacked LSTM  0.495968   0.611082 0.495968 0.423582      5.94

=== Split 30% test ===
                  Model  Accuracy  Precision   Recall       F1  Time (s)
                Bi-LSTM  0.583333   0.758356 0.583333 0.438153      4.62
PPO (Sequential Hybrid)  0.575269   0.755665 0.575269 0.420162     14.06
                   LSTM  0.559140   0.552889 0.559140 0.554721      2.56
           Stacked LSTM  0.532258   0.580715 0.532258 0.520958      5.50
                    CNN  0.529570   0.467048 0.529570 0.458857      0.89
   